From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- tools/profiler/core/EHABIStackWalk.cpp | 597 ++ tools/profiler/core/EHABIStackWalk.h | 28 + tools/profiler/core/MicroGeckoProfiler.cpp | 203 + tools/profiler/core/PageInformation.cpp | 44 + tools/profiler/core/PageInformation.h | 68 + tools/profiler/core/PlatformMacros.h | 130 + tools/profiler/core/PowerCounters-linux.cpp | 287 + tools/profiler/core/PowerCounters-mac-amd64.cpp | 419 ++ tools/profiler/core/PowerCounters-mac-arm64.cpp | 47 + tools/profiler/core/PowerCounters-win.cpp | 342 + tools/profiler/core/PowerCounters.h | 52 + .../profiler/core/ProfileAdditionalInformation.cpp | 102 + tools/profiler/core/ProfileBuffer.cpp | 243 + tools/profiler/core/ProfileBuffer.h | 260 + tools/profiler/core/ProfileBufferEntry.cpp | 2321 +++++++ tools/profiler/core/ProfileBufferEntry.h | 532 ++ tools/profiler/core/ProfiledThreadData.cpp | 455 ++ tools/profiler/core/ProfiledThreadData.h | 250 + tools/profiler/core/ProfilerBacktrace.cpp | 101 + tools/profiler/core/ProfilerBacktrace.h | 184 + tools/profiler/core/ProfilerBindings.cpp | 386 ++ tools/profiler/core/ProfilerCodeAddressService.cpp | 75 + tools/profiler/core/ProfilerMarkers.cpp | 32 + tools/profiler/core/ProfilerThreadRegistration.cpp | 198 + .../core/ProfilerThreadRegistrationData.cpp | 303 + tools/profiler/core/ProfilerThreadRegistry.cpp | 40 + tools/profiler/core/ProfilerUtils.cpp | 118 + tools/profiler/core/VTuneProfiler.cpp | 80 + tools/profiler/core/VTuneProfiler.h | 78 + tools/profiler/core/memory_hooks.cpp | 632 ++ tools/profiler/core/memory_hooks.h | 25 + tools/profiler/core/platform-linux-android.cpp | 636 ++ tools/profiler/core/platform-macos.cpp | 297 + tools/profiler/core/platform-win32.cpp | 496 ++ tools/profiler/core/platform.cpp | 7067 ++++++++++++++++++++ tools/profiler/core/platform.h | 381 ++ tools/profiler/core/shared-libraries-linux.cc | 280 + tools/profiler/core/shared-libraries-macos.cc | 211 + tools/profiler/core/shared-libraries-win32.cc | 167 + tools/profiler/core/vtune/ittnotify.h | 4123 ++++++++++++ tools/profiler/docs/buffer.rst | 70 + tools/profiler/docs/code-overview.rst | 1494 +++++ tools/profiler/docs/fissionprofiler-20200424.png | Bin 0 -> 131301 bytes tools/profiler/docs/fissionprofiler.umlet.uxf | 546 ++ tools/profiler/docs/index.rst | 37 + tools/profiler/docs/instrumenting-javascript.rst | 60 + tools/profiler/docs/instrumenting-rust.rst | 433 ++ tools/profiler/docs/markers-guide.rst | 485 ++ tools/profiler/docs/memory.rst | 46 + tools/profiler/docs/profilerclasses-20220913.png | Bin 0 -> 727313 bytes tools/profiler/docs/profilerclasses.umlet.uxf | 811 +++ .../docs/profilerthreadregistration-20220913.png | Bin 0 -> 383738 bytes .../docs/profilerthreadregistration.umlet.uxf | 710 ++ tools/profiler/gecko/ChildProfilerController.cpp | 170 + tools/profiler/gecko/PProfiler.ipdl | 44 + tools/profiler/gecko/ProfilerChild.cpp | 565 ++ .../profiler/gecko/ProfilerIOInterposeObserver.cpp | 216 + tools/profiler/gecko/ProfilerIOInterposeObserver.h | 32 + tools/profiler/gecko/ProfilerParent.cpp | 1002 +++ tools/profiler/gecko/ProfilerTypes.ipdlh | 43 + tools/profiler/gecko/components.conf | 17 + tools/profiler/gecko/nsIProfiler.idl | 208 + tools/profiler/gecko/nsProfiler.cpp | 1487 ++++ tools/profiler/gecko/nsProfiler.h | 117 + tools/profiler/gecko/nsProfilerCIID.h | 16 + tools/profiler/gecko/nsProfilerStartParams.cpp | 65 + tools/profiler/gecko/nsProfilerStartParams.h | 36 + tools/profiler/lul/AutoObjectMapper.cpp | 79 + tools/profiler/lul/AutoObjectMapper.h | 64 + tools/profiler/lul/LulCommon.cpp | 100 + tools/profiler/lul/LulCommonExt.h | 509 ++ tools/profiler/lul/LulDwarf.cpp | 2538 +++++++ tools/profiler/lul/LulDwarfExt.h | 1312 ++++ tools/profiler/lul/LulDwarfInt.h | 193 + tools/profiler/lul/LulDwarfSummariser.cpp | 549 ++ tools/profiler/lul/LulDwarfSummariser.h | 64 + tools/profiler/lul/LulElf.cpp | 887 +++ tools/profiler/lul/LulElfExt.h | 69 + tools/profiler/lul/LulElfInt.h | 218 + tools/profiler/lul/LulMain.cpp | 2079 ++++++ tools/profiler/lul/LulMain.h | 378 ++ tools/profiler/lul/LulMainInt.h | 631 ++ tools/profiler/lul/platform-linux-lul.cpp | 75 + tools/profiler/lul/platform-linux-lul.h | 19 + tools/profiler/moz.build | 227 + tools/profiler/public/ChildProfilerController.h | 71 + tools/profiler/public/GeckoProfiler.h | 435 ++ tools/profiler/public/GeckoProfilerReporter.h | 26 + tools/profiler/public/GeckoTraceEvent.h | 1060 +++ tools/profiler/public/MicroGeckoProfiler.h | 130 + .../profiler/public/ProfileAdditionalInformation.h | 90 + ...rofileBufferEntrySerializationGeckoExtensions.h | 160 + tools/profiler/public/ProfileJSONWriter.h | 19 + tools/profiler/public/ProfilerBindings.h | 162 + tools/profiler/public/ProfilerChild.h | 106 + tools/profiler/public/ProfilerCodeAddressService.h | 52 + tools/profiler/public/ProfilerControl.h | 190 + tools/profiler/public/ProfilerCounts.h | 296 + tools/profiler/public/ProfilerLabels.h | 268 + tools/profiler/public/ProfilerMarkerTypes.h | 41 + tools/profiler/public/ProfilerMarkers.h | 355 + tools/profiler/public/ProfilerMarkersDetail.h | 31 + .../profiler/public/ProfilerMarkersPrerequisites.h | 31 + tools/profiler/public/ProfilerParent.h | 119 + tools/profiler/public/ProfilerRunnable.h | 68 + tools/profiler/public/ProfilerRustBindings.h | 12 + tools/profiler/public/ProfilerState.h | 399 ++ tools/profiler/public/ProfilerThreadPlatformData.h | 80 + tools/profiler/public/ProfilerThreadRegistration.h | 367 + .../public/ProfilerThreadRegistrationData.h | 537 ++ .../public/ProfilerThreadRegistrationInfo.h | 64 + tools/profiler/public/ProfilerThreadRegistry.h | 321 + tools/profiler/public/ProfilerThreadSleep.h | 58 + tools/profiler/public/ProfilerThreadState.h | 128 + tools/profiler/public/ProfilerUtils.h | 32 + tools/profiler/public/shared-libraries.h | 213 + tools/profiler/rust-api/Cargo.toml | 23 + tools/profiler/rust-api/README.md | 5 + tools/profiler/rust-api/build.rs | 118 + tools/profiler/rust-api/cbindgen.toml | 15 + tools/profiler/rust-api/extra-bindgen-flags.in | 1 + tools/profiler/rust-api/macros/Cargo.toml | 13 + tools/profiler/rust-api/macros/src/lib.rs | 65 + tools/profiler/rust-api/src/gecko_bindings/glue.rs | 53 + tools/profiler/rust-api/src/gecko_bindings/mod.rs | 21 + .../src/gecko_bindings/profiling_categories.rs | 32 + tools/profiler/rust-api/src/json_writer.rs | 86 + tools/profiler/rust-api/src/label.rs | 137 + tools/profiler/rust-api/src/lib.rs | 29 + .../rust-api/src/marker/deserializer_tags_state.rs | 116 + tools/profiler/rust-api/src/marker/mod.rs | 284 + tools/profiler/rust-api/src/marker/options.rs | 138 + tools/profiler/rust-api/src/marker/schema.rs | 233 + tools/profiler/rust-api/src/profiler_state.rs | 78 + tools/profiler/rust-api/src/thread.rs | 23 + tools/profiler/rust-api/src/time.rs | 71 + tools/profiler/rust-helper/Cargo.toml | 23 + .../rust-helper/src/compact_symbol_table.rs | 40 + tools/profiler/rust-helper/src/elf.rs | 101 + tools/profiler/rust-helper/src/lib.rs | 107 + tools/profiler/tests/browser/browser.ini | 102 + .../browser/browser_test_feature_ipcmessages.js | 100 + .../browser/browser_test_feature_jsallocations.js | 74 + .../browser_test_feature_nostacksampling.js | 72 + .../browser/browser_test_marker_network_cancel.js | 71 + ...browser_test_marker_network_private_browsing.js | 91 + .../browser_test_marker_network_redirect.js | 341 + ...est_marker_network_serviceworker_cache_first.js | 378 ++ ...arker_network_serviceworker_no_fetch_handler.js | 218 + ...erviceworker_no_respondWith_in_fetch_handler.js | 294 + ...r_network_serviceworker_synthetized_response.js | 480 ++ .../browser/browser_test_marker_network_simple.js | 81 + .../browser/browser_test_marker_network_sts.js | 130 + .../tests/browser/browser_test_markers_gc_cc.js | 49 + .../browser/browser_test_markers_parent_process.js | 37 + .../browser_test_markers_preferencereads.js | 73 + .../browser/browser_test_profile_capture_by_pid.js | 199 + .../tests/browser/browser_test_profile_fission.js | 191 + .../browser_test_profile_multi_frame_page_info.js | 83 + .../browser_test_profile_single_frame_page_info.js | 132 + .../browser/browser_test_profile_slow_capture.js | 104 + tools/profiler/tests/browser/do_work_500ms.html | 41 + .../tests/browser/firefox-logo-nightly.svg | 1 + tools/profiler/tests/browser/head.js | 159 + tools/profiler/tests/browser/multi_frame.html | 11 + .../tests/browser/page_with_resources.html | 11 + tools/profiler/tests/browser/redirect.sjs | 8 + .../serviceworkers/firefox-logo-nightly.svg | 1 + .../browser/serviceworkers/serviceworker-utils.js | 39 + .../serviceworkers/serviceworker_cache_first.js | 34 + .../serviceworker_no_fetch_handler.js | 4 + ...erviceworker_no_respondWith_in_fetch_handler.js | 9 + .../browser/serviceworkers/serviceworker_page.html | 10 + .../serviceworkers/serviceworker_register.html | 9 + .../serviceworkers/serviceworker_simple.html | 9 + .../serviceworker_synthetized_response.js | 27 + tools/profiler/tests/browser/simple.html | 9 + tools/profiler/tests/browser/single_frame.html | 10 + tools/profiler/tests/chrome/chrome.ini | 8 + tools/profiler/tests/chrome/profiler_test_utils.js | 66 + .../profiler/tests/chrome/test_profile_worker.html | 66 + .../chrome/test_profile_worker_bug_1428076.html | 58 + tools/profiler/tests/gtest/GeckoProfiler.cpp | 5099 ++++++++++++++ tools/profiler/tests/gtest/LulTest.cpp | 51 + tools/profiler/tests/gtest/LulTestDwarf.cpp | 2733 ++++++++ .../profiler/tests/gtest/LulTestInfrastructure.cpp | 498 ++ tools/profiler/tests/gtest/LulTestInfrastructure.h | 736 ++ tools/profiler/tests/gtest/ThreadProfileTest.cpp | 60 + tools/profiler/tests/gtest/moz.build | 45 + tools/profiler/tests/shared-head.js | 591 ++ tools/profiler/tests/xpcshell/head.js | 244 + .../tests/xpcshell/test_active_configuration.js | 115 + .../tests/xpcshell/test_addProfilerMarker.js | 221 + tools/profiler/tests/xpcshell/test_asm.js | 76 + .../tests/xpcshell/test_assertion_helper.js | 162 + tools/profiler/tests/xpcshell/test_enterjit_osr.js | 52 + .../tests/xpcshell/test_enterjit_osr_disabling.js | 14 + .../tests/xpcshell/test_enterjit_osr_enabling.js | 14 + .../tests/xpcshell/test_feature_fileioall.js | 159 + tools/profiler/tests/xpcshell/test_feature_java.js | 31 + tools/profiler/tests/xpcshell/test_feature_js.js | 63 + .../tests/xpcshell/test_feature_mainthreadio.js | 122 + .../xpcshell/test_feature_nativeallocations.js | 158 + .../tests/xpcshell/test_feature_stackwalking.js | 48 + tools/profiler/tests/xpcshell/test_get_features.js | 8 + .../profiler/tests/xpcshell/test_merged_stacks.js | 74 + tools/profiler/tests/xpcshell/test_pause.js | 126 + .../profiler/tests/xpcshell/test_responsiveness.js | 50 + tools/profiler/tests/xpcshell/test_run.js | 37 + .../profiler/tests/xpcshell/test_shared_library.js | 21 + tools/profiler/tests/xpcshell/test_start.js | 21 + tools/profiler/tests/xpcshell/xpcshell.ini | 72 + 212 files changed, 64315 insertions(+) create mode 100644 tools/profiler/core/EHABIStackWalk.cpp create mode 100644 tools/profiler/core/EHABIStackWalk.h create mode 100644 tools/profiler/core/MicroGeckoProfiler.cpp create mode 100644 tools/profiler/core/PageInformation.cpp create mode 100644 tools/profiler/core/PageInformation.h create mode 100644 tools/profiler/core/PlatformMacros.h create mode 100644 tools/profiler/core/PowerCounters-linux.cpp create mode 100644 tools/profiler/core/PowerCounters-mac-amd64.cpp create mode 100644 tools/profiler/core/PowerCounters-mac-arm64.cpp create mode 100644 tools/profiler/core/PowerCounters-win.cpp create mode 100644 tools/profiler/core/PowerCounters.h create mode 100644 tools/profiler/core/ProfileAdditionalInformation.cpp create mode 100644 tools/profiler/core/ProfileBuffer.cpp create mode 100644 tools/profiler/core/ProfileBuffer.h create mode 100644 tools/profiler/core/ProfileBufferEntry.cpp create mode 100644 tools/profiler/core/ProfileBufferEntry.h create mode 100644 tools/profiler/core/ProfiledThreadData.cpp create mode 100644 tools/profiler/core/ProfiledThreadData.h create mode 100644 tools/profiler/core/ProfilerBacktrace.cpp create mode 100644 tools/profiler/core/ProfilerBacktrace.h create mode 100644 tools/profiler/core/ProfilerBindings.cpp create mode 100644 tools/profiler/core/ProfilerCodeAddressService.cpp create mode 100644 tools/profiler/core/ProfilerMarkers.cpp create mode 100644 tools/profiler/core/ProfilerThreadRegistration.cpp create mode 100644 tools/profiler/core/ProfilerThreadRegistrationData.cpp create mode 100644 tools/profiler/core/ProfilerThreadRegistry.cpp create mode 100644 tools/profiler/core/ProfilerUtils.cpp create mode 100644 tools/profiler/core/VTuneProfiler.cpp create mode 100644 tools/profiler/core/VTuneProfiler.h create mode 100644 tools/profiler/core/memory_hooks.cpp create mode 100644 tools/profiler/core/memory_hooks.h create mode 100644 tools/profiler/core/platform-linux-android.cpp create mode 100644 tools/profiler/core/platform-macos.cpp create mode 100644 tools/profiler/core/platform-win32.cpp create mode 100644 tools/profiler/core/platform.cpp create mode 100644 tools/profiler/core/platform.h create mode 100644 tools/profiler/core/shared-libraries-linux.cc create mode 100644 tools/profiler/core/shared-libraries-macos.cc create mode 100644 tools/profiler/core/shared-libraries-win32.cc create mode 100644 tools/profiler/core/vtune/ittnotify.h create mode 100644 tools/profiler/docs/buffer.rst create mode 100644 tools/profiler/docs/code-overview.rst create mode 100644 tools/profiler/docs/fissionprofiler-20200424.png create mode 100644 tools/profiler/docs/fissionprofiler.umlet.uxf create mode 100644 tools/profiler/docs/index.rst create mode 100644 tools/profiler/docs/instrumenting-javascript.rst create mode 100644 tools/profiler/docs/instrumenting-rust.rst create mode 100644 tools/profiler/docs/markers-guide.rst create mode 100644 tools/profiler/docs/memory.rst create mode 100644 tools/profiler/docs/profilerclasses-20220913.png create mode 100644 tools/profiler/docs/profilerclasses.umlet.uxf create mode 100644 tools/profiler/docs/profilerthreadregistration-20220913.png create mode 100644 tools/profiler/docs/profilerthreadregistration.umlet.uxf create mode 100644 tools/profiler/gecko/ChildProfilerController.cpp create mode 100644 tools/profiler/gecko/PProfiler.ipdl create mode 100644 tools/profiler/gecko/ProfilerChild.cpp create mode 100644 tools/profiler/gecko/ProfilerIOInterposeObserver.cpp create mode 100644 tools/profiler/gecko/ProfilerIOInterposeObserver.h create mode 100644 tools/profiler/gecko/ProfilerParent.cpp create mode 100644 tools/profiler/gecko/ProfilerTypes.ipdlh create mode 100644 tools/profiler/gecko/components.conf create mode 100644 tools/profiler/gecko/nsIProfiler.idl create mode 100644 tools/profiler/gecko/nsProfiler.cpp create mode 100644 tools/profiler/gecko/nsProfiler.h create mode 100644 tools/profiler/gecko/nsProfilerCIID.h create mode 100644 tools/profiler/gecko/nsProfilerStartParams.cpp create mode 100644 tools/profiler/gecko/nsProfilerStartParams.h create mode 100644 tools/profiler/lul/AutoObjectMapper.cpp create mode 100644 tools/profiler/lul/AutoObjectMapper.h create mode 100644 tools/profiler/lul/LulCommon.cpp create mode 100644 tools/profiler/lul/LulCommonExt.h create mode 100644 tools/profiler/lul/LulDwarf.cpp create mode 100644 tools/profiler/lul/LulDwarfExt.h create mode 100644 tools/profiler/lul/LulDwarfInt.h create mode 100644 tools/profiler/lul/LulDwarfSummariser.cpp create mode 100644 tools/profiler/lul/LulDwarfSummariser.h create mode 100644 tools/profiler/lul/LulElf.cpp create mode 100644 tools/profiler/lul/LulElfExt.h create mode 100644 tools/profiler/lul/LulElfInt.h create mode 100644 tools/profiler/lul/LulMain.cpp create mode 100644 tools/profiler/lul/LulMain.h create mode 100644 tools/profiler/lul/LulMainInt.h create mode 100644 tools/profiler/lul/platform-linux-lul.cpp create mode 100644 tools/profiler/lul/platform-linux-lul.h create mode 100644 tools/profiler/moz.build create mode 100644 tools/profiler/public/ChildProfilerController.h create mode 100644 tools/profiler/public/GeckoProfiler.h create mode 100644 tools/profiler/public/GeckoProfilerReporter.h create mode 100644 tools/profiler/public/GeckoTraceEvent.h create mode 100644 tools/profiler/public/MicroGeckoProfiler.h create mode 100644 tools/profiler/public/ProfileAdditionalInformation.h create mode 100644 tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h create mode 100644 tools/profiler/public/ProfileJSONWriter.h create mode 100644 tools/profiler/public/ProfilerBindings.h create mode 100644 tools/profiler/public/ProfilerChild.h create mode 100644 tools/profiler/public/ProfilerCodeAddressService.h create mode 100644 tools/profiler/public/ProfilerControl.h create mode 100644 tools/profiler/public/ProfilerCounts.h create mode 100644 tools/profiler/public/ProfilerLabels.h create mode 100644 tools/profiler/public/ProfilerMarkerTypes.h create mode 100644 tools/profiler/public/ProfilerMarkers.h create mode 100644 tools/profiler/public/ProfilerMarkersDetail.h create mode 100644 tools/profiler/public/ProfilerMarkersPrerequisites.h create mode 100644 tools/profiler/public/ProfilerParent.h create mode 100644 tools/profiler/public/ProfilerRunnable.h create mode 100644 tools/profiler/public/ProfilerRustBindings.h create mode 100644 tools/profiler/public/ProfilerState.h create mode 100644 tools/profiler/public/ProfilerThreadPlatformData.h create mode 100644 tools/profiler/public/ProfilerThreadRegistration.h create mode 100644 tools/profiler/public/ProfilerThreadRegistrationData.h create mode 100644 tools/profiler/public/ProfilerThreadRegistrationInfo.h create mode 100644 tools/profiler/public/ProfilerThreadRegistry.h create mode 100644 tools/profiler/public/ProfilerThreadSleep.h create mode 100644 tools/profiler/public/ProfilerThreadState.h create mode 100644 tools/profiler/public/ProfilerUtils.h create mode 100644 tools/profiler/public/shared-libraries.h create mode 100644 tools/profiler/rust-api/Cargo.toml create mode 100644 tools/profiler/rust-api/README.md create mode 100644 tools/profiler/rust-api/build.rs create mode 100644 tools/profiler/rust-api/cbindgen.toml create mode 100644 tools/profiler/rust-api/extra-bindgen-flags.in create mode 100644 tools/profiler/rust-api/macros/Cargo.toml create mode 100644 tools/profiler/rust-api/macros/src/lib.rs create mode 100644 tools/profiler/rust-api/src/gecko_bindings/glue.rs create mode 100644 tools/profiler/rust-api/src/gecko_bindings/mod.rs create mode 100644 tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs create mode 100644 tools/profiler/rust-api/src/json_writer.rs create mode 100644 tools/profiler/rust-api/src/label.rs create mode 100644 tools/profiler/rust-api/src/lib.rs create mode 100644 tools/profiler/rust-api/src/marker/deserializer_tags_state.rs create mode 100644 tools/profiler/rust-api/src/marker/mod.rs create mode 100644 tools/profiler/rust-api/src/marker/options.rs create mode 100644 tools/profiler/rust-api/src/marker/schema.rs create mode 100644 tools/profiler/rust-api/src/profiler_state.rs create mode 100644 tools/profiler/rust-api/src/thread.rs create mode 100644 tools/profiler/rust-api/src/time.rs create mode 100644 tools/profiler/rust-helper/Cargo.toml create mode 100644 tools/profiler/rust-helper/src/compact_symbol_table.rs create mode 100644 tools/profiler/rust-helper/src/elf.rs create mode 100644 tools/profiler/rust-helper/src/lib.rs create mode 100644 tools/profiler/tests/browser/browser.ini create mode 100644 tools/profiler/tests/browser/browser_test_feature_ipcmessages.js create mode 100644 tools/profiler/tests/browser/browser_test_feature_jsallocations.js create mode 100644 tools/profiler/tests/browser/browser_test_feature_nostacksampling.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_cancel.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_redirect.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_simple.js create mode 100644 tools/profiler/tests/browser/browser_test_marker_network_sts.js create mode 100644 tools/profiler/tests/browser/browser_test_markers_gc_cc.js create mode 100644 tools/profiler/tests/browser/browser_test_markers_parent_process.js create mode 100644 tools/profiler/tests/browser/browser_test_markers_preferencereads.js create mode 100644 tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js create mode 100644 tools/profiler/tests/browser/browser_test_profile_fission.js create mode 100644 tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js create mode 100644 tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js create mode 100644 tools/profiler/tests/browser/browser_test_profile_slow_capture.js create mode 100644 tools/profiler/tests/browser/do_work_500ms.html create mode 100644 tools/profiler/tests/browser/firefox-logo-nightly.svg create mode 100644 tools/profiler/tests/browser/head.js create mode 100644 tools/profiler/tests/browser/multi_frame.html create mode 100644 tools/profiler/tests/browser/page_with_resources.html create mode 100644 tools/profiler/tests/browser/redirect.sjs create mode 100644 tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_page.html create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_register.html create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html create mode 100644 tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js create mode 100644 tools/profiler/tests/browser/simple.html create mode 100644 tools/profiler/tests/browser/single_frame.html create mode 100644 tools/profiler/tests/chrome/chrome.ini create mode 100644 tools/profiler/tests/chrome/profiler_test_utils.js create mode 100644 tools/profiler/tests/chrome/test_profile_worker.html create mode 100644 tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html create mode 100644 tools/profiler/tests/gtest/GeckoProfiler.cpp create mode 100644 tools/profiler/tests/gtest/LulTest.cpp create mode 100644 tools/profiler/tests/gtest/LulTestDwarf.cpp create mode 100644 tools/profiler/tests/gtest/LulTestInfrastructure.cpp create mode 100644 tools/profiler/tests/gtest/LulTestInfrastructure.h create mode 100644 tools/profiler/tests/gtest/ThreadProfileTest.cpp create mode 100644 tools/profiler/tests/gtest/moz.build create mode 100644 tools/profiler/tests/shared-head.js create mode 100644 tools/profiler/tests/xpcshell/head.js create mode 100644 tools/profiler/tests/xpcshell/test_active_configuration.js create mode 100644 tools/profiler/tests/xpcshell/test_addProfilerMarker.js create mode 100644 tools/profiler/tests/xpcshell/test_asm.js create mode 100644 tools/profiler/tests/xpcshell/test_assertion_helper.js create mode 100644 tools/profiler/tests/xpcshell/test_enterjit_osr.js create mode 100644 tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js create mode 100644 tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_fileioall.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_java.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_js.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_mainthreadio.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_nativeallocations.js create mode 100644 tools/profiler/tests/xpcshell/test_feature_stackwalking.js create mode 100644 tools/profiler/tests/xpcshell/test_get_features.js create mode 100644 tools/profiler/tests/xpcshell/test_merged_stacks.js create mode 100644 tools/profiler/tests/xpcshell/test_pause.js create mode 100644 tools/profiler/tests/xpcshell/test_responsiveness.js create mode 100644 tools/profiler/tests/xpcshell/test_run.js create mode 100644 tools/profiler/tests/xpcshell/test_shared_library.js create mode 100644 tools/profiler/tests/xpcshell/test_start.js create mode 100644 tools/profiler/tests/xpcshell/xpcshell.ini (limited to 'tools/profiler') diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp new file mode 100644 index 0000000000..e3099b89ec --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.cpp @@ -0,0 +1,597 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI, as described in: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf + * + * This handles only the ARM-defined "personality routines" (chapter + * 9), and don't track the value of FP registers, because profiling + * needs only chain of PC/SP values. + * + * Because the exception handling info may not be accurate for all + * possible places where an async signal could occur (e.g., in a + * prologue or epilogue), this bounds-checks all stack accesses. + * + * This file uses "struct" for structures in the exception tables and + * "class" otherwise. We should avoid violating the C++11 + * standard-layout rules in the former. + */ + +#include "EHABIStackWalk.h" + +#include "shared-libraries.h" +#include "platform.h" + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/EndianUtils.h" + +#include +#include +#include +#include +#include + +#ifndef PT_ARM_EXIDX +# define PT_ARM_EXIDX 0x70000001 +#endif + +namespace mozilla { + +struct PRel31 { + uint32_t mBits; + bool topBit() const { return mBits & 0x80000000; } + uint32_t value() const { return mBits & 0x7fffffff; } + int32_t offset() const { return (static_cast(mBits) << 1) >> 1; } + const void* compute() const { + return reinterpret_cast(this) + offset(); + } + + private: + PRel31(const PRel31& copied) = delete; + PRel31() = delete; +}; + +struct EHEntry { + PRel31 startPC; + PRel31 exidx; + + private: + EHEntry(const EHEntry& copied) = delete; + EHEntry() = delete; +}; + +class EHState { + // Note that any core register can be used as a "frame pointer" to + // influence the unwinding process, so this must track all of them. + uint32_t mRegs[16]; + + public: + bool unwind(const EHEntry* aEntry, const void* stackBase); + uint32_t& operator[](int i) { return mRegs[i]; } + const uint32_t& operator[](int i) const { return mRegs[i]; } + explicit EHState(const mcontext_t&); +}; + +enum { R_SP = 13, R_LR = 14, R_PC = 15 }; + +class EHTable { + uint32_t mStartPC; + uint32_t mEndPC; + uint32_t mBaseAddress; + const EHEntry* mEntriesBegin; + const EHEntry* mEntriesEnd; + std::string mName; + + public: + EHTable(const void* aELF, size_t aSize, const std::string& aName); + const EHEntry* lookup(uint32_t aPC) const; + bool isValid() const { return mEntriesEnd != mEntriesBegin; } + const std::string& name() const { return mName; } + uint32_t startPC() const { return mStartPC; } + uint32_t endPC() const { return mEndPC; } + uint32_t baseAddress() const { return mBaseAddress; } +}; + +class EHAddrSpace { + std::vector mStarts; + std::vector mTables; + static mozilla::Atomic sCurrent; + + public: + explicit EHAddrSpace(const std::vector& aTables); + const EHTable* lookup(uint32_t aPC) const; + static void Update(); + static const EHAddrSpace* Get(); +}; + +void EHABIStackWalkInit() { EHAddrSpace::Update(); } + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, const size_t aNumFrames) { + const EHAddrSpace* space = EHAddrSpace::Get(); + EHState state(aContext); + size_t count = 0; + + while (count < aNumFrames) { + uint32_t pc = state[R_PC], sp = state[R_SP]; + + // ARM instructions are always aligned to 2 or 4 bytes. + // The last bit of the pc / lr indicates ARM or Thumb mode. + // We're only interested in the instruction address, so we mask off that + // bit. + constexpr uint32_t instrAddrMask = ~1; + uint32_t instrAddress = pc & instrAddrMask; + + aPCs[count] = reinterpret_cast(instrAddress); + aSPs[count] = reinterpret_cast(sp); + count++; + + if (!space) break; + // TODO: cache these lookups. Binary-searching libxul is + // expensive (possibly more expensive than doing the actual + // unwind), and even a small cache should help. + const EHTable* table = space->lookup(pc); + if (!table) break; + const EHEntry* entry = table->lookup(pc); + if (!entry) break; + if (!state.unwind(entry, stackBase)) break; + } + + return count; +} + +class EHInterp { + public: + // Note that stackLimit is exclusive and stackBase is inclusive + // (i.e, stackLimit < SP <= stackBase), following the convention + // set by the AAPCS spec. + EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit, + uint32_t aStackBase) + : mState(aState), + mStackLimit(aStackLimit), + mStackBase(aStackBase), + mNextWord(0), + mWordsLeft(0), + mFailed(false) { + const PRel31& exidx = aEntry->exidx; + uint32_t firstWord; + + if (exidx.mBits == 1) { // EXIDX_CANTUNWIND + mFailed = true; + return; + } + if (exidx.topBit()) { + firstWord = exidx.mBits; + } else { + mNextWord = reinterpret_cast(exidx.compute()); + firstWord = *mNextWord++; + } + + switch (firstWord >> 24) { + case 0x80: // short + mWord = firstWord << 8; + mBytesLeft = 3; + break; + case 0x81: + case 0x82: // long; catch descriptor size ignored + mWord = firstWord << 16; + mBytesLeft = 2; + mWordsLeft = (firstWord >> 16) & 0xff; + break; + default: + // unknown personality + mFailed = true; + } + } + + bool unwind(); + + private: + // TODO: GCC has been observed not CSEing repeated reads of + // mState[R_SP] with writes to mFailed between them, suggesting that + // it hasn't determined that they can't alias and is thus missing + // optimization opportunities. So, we may want to flatten EHState + // into this class; this may also make the code simpler. + EHState& mState; + uint32_t mStackLimit; + uint32_t mStackBase; + const uint32_t* mNextWord; + uint32_t mWord; + uint8_t mWordsLeft; + uint8_t mBytesLeft; + bool mFailed; + + enum { + I_ADDSP = 0x00, // 0sxxxxxx (subtract if s) + M_ADDSP = 0x80, + I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set) + M_POPMASK = 0xf0, + I_MOVSP = 0x90, // 1001nnnn + M_MOVSP = 0xf0, + I_POPN = 0xa0, // 1010lnnn + M_POPN = 0xf0, + I_FINISH = 0xb0, // 10110000 + I_POPLO = 0xb1, // 10110001 0000iiii (if any i set) + I_ADDSPBIG = 0xb2, // 10110010 uleb128 + I_POPFDX = 0xb3, // 10110011 sssscccc + I_POPFDX8 = 0xb8, // 10111nnn + M_POPFDX8 = 0xf8, + // "Intel Wireless MMX" extensions omitted. + I_POPFDD = 0xc8, // 1100100h sssscccc + M_POPFDD = 0xfe, + I_POPFDD8 = 0xd0, // 11010nnn + M_POPFDD8 = 0xf8 + }; + + uint8_t next() { + if (mBytesLeft == 0) { + if (mWordsLeft == 0) { + return I_FINISH; + } + mWordsLeft--; + mWord = *mNextWord++; + mBytesLeft = 4; + } + mBytesLeft--; + mWord = (mWord << 8) | (mWord >> 24); // rotate + return mWord; + } + + uint32_t& vSP() { return mState[R_SP]; } + uint32_t* ptrSP() { return reinterpret_cast(vSP()); } + + void checkStackBase() { + if (vSP() > mStackBase) mFailed = true; + } + void checkStackLimit() { + if (vSP() <= mStackLimit) mFailed = true; + } + void checkStackAlign() { + if ((vSP() & 3) != 0) mFailed = true; + } + void checkStack() { + checkStackBase(); + checkStackLimit(); + checkStackAlign(); + } + + void popRange(uint8_t first, uint8_t last, uint16_t mask) { + bool hasSP = false; + uint32_t tmpSP; + if (mask == 0) mFailed = true; + for (uint8_t r = first; r <= last; ++r) { + if (mask & 1) { + if (r == R_SP) { + hasSP = true; + tmpSP = *ptrSP(); + } else + mState[r] = *ptrSP(); + vSP() += 4; + checkStackBase(); + if (mFailed) return; + } + mask >>= 1; + } + if (hasSP) { + vSP() = tmpSP; + checkStack(); + } + } +}; + +bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) { + // The unwinding program cannot set SP to less than the initial value. + uint32_t stackLimit = mRegs[R_SP] - 4; + uint32_t stackBase = reinterpret_cast(stackBasePtr); + EHInterp interp(*this, aEntry, stackLimit, stackBase); + return interp.unwind(); +} + +bool EHInterp::unwind() { + mState[R_PC] = 0; + checkStack(); + while (!mFailed) { + uint8_t insn = next(); +#if DEBUG_EHABI_UNWIND + LOG("unwind insn = %02x", (unsigned)insn); +#endif + // Try to put the common cases first. + + // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4 + // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4 + if ((insn & M_ADDSP) == I_ADDSP) { + uint32_t offset = ((insn & 0x3f) << 2) + 4; + if (insn & 0x40) { + vSP() -= offset; + checkStackLimit(); + } else { + vSP() += offset; + checkStackBase(); + } + continue; + } + + // 10100nnn: Pop r4-r[4+nnn] + // 10101nnn: Pop r4-r[4+nnn], r14 + if ((insn & M_POPN) == I_POPN) { + uint8_t n = (insn & 0x07) + 1; + bool lr = insn & 0x08; + uint32_t* ptr = ptrSP(); + vSP() += (n + (lr ? 1 : 0)) * 4; + checkStackBase(); + for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++; + if (lr) mState[R_LR] = *ptr++; + continue; + } + + // 1011000: Finish + if (insn == I_FINISH) { + if (mState[R_PC] == 0) { + mState[R_PC] = mState[R_LR]; + // Non-standard change (bug 916106): Prevent the caller from + // re-using LR. Since the caller is by definition not a leaf + // routine, it will have to restore LR from somewhere to + // return to its own caller, so we can safely zero it here. + // This makes a difference only if an error in unwinding + // (e.g., caused by starting from within a prologue/epilogue) + // causes us to load a pointer to a leaf routine as LR; if we + // don't do something, we'll go into an infinite loop of + // "returning" to that same function. + mState[R_LR] = 0; + } + return true; + } + + // 1001nnnn: Set vsp = r[nnnn] + if ((insn & M_MOVSP) == I_MOVSP) { + vSP() = mState[insn & 0x0f]; + checkStack(); + continue; + } + + // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD) + // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD) + if ((insn & M_POPFDD) == I_POPFDD) { + uint8_t n = (next() & 0x0f) + 1; + // Note: if the 16+ssss+cccc > 31, the encoding is reserved. + // As the space is currently unused, we don't try to check. + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD) + if ((insn & M_POPFDD8) == I_POPFDD8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2) + if (insn == I_ADDSPBIG) { + uint32_t acc = 0; + uint8_t shift = 0; + uint8_t byte; + do { + if (shift >= 32) return false; + byte = next(); + acc |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + uint32_t offset = 0x204 + (acc << 2); + // The calculations above could have overflowed. + // But the one we care about is this: + if (vSP() + offset < vSP()) mFailed = true; + vSP() += offset; + // ...so that this is the only other check needed: + checkStackBase(); + continue; + } + + // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4} + if ((insn & M_POPMASK) == I_POPMASK) { + popRange(4, 15, ((insn & 0x0f) << 8) | next()); + continue; + } + + // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0} + if (insn == I_POPLO) { + popRange(0, 3, next() & 0x0f); + continue; + } + + // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX) + if (insn == I_POPFDX) { + uint8_t n = (next() & 0x0f) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX) + if ((insn & M_POPFDX8) == I_POPFDX8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // unhandled instruction +#ifdef DEBUG_EHABI_UNWIND + LOG("Unhandled EHABI instruction 0x%02x", insn); +#endif + mFailed = true; + } + return false; +} + +bool operator<(const EHTable& lhs, const EHTable& rhs) { + return lhs.startPC() < rhs.startPC(); +} + +// Async signal unsafe. +EHAddrSpace::EHAddrSpace(const std::vector& aTables) + : mTables(aTables) { + std::sort(mTables.begin(), mTables.end()); + DebugOnly lastEnd = 0; + for (std::vector::iterator i = mTables.begin(); i != mTables.end(); + ++i) { + MOZ_ASSERT(i->startPC() >= lastEnd); + mStarts.push_back(i->startPC()); + lastEnd = i->endPC(); + } +} + +const EHTable* EHAddrSpace::lookup(uint32_t aPC) const { + ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) - + mStarts.begin()) - + 1; + + if (i < 0 || aPC >= mTables[i].endPC()) return 0; + return &mTables[i]; +} + +const EHEntry* EHTable::lookup(uint32_t aPC) const { + MOZ_ASSERT(aPC >= mStartPC); + if (aPC >= mEndPC) return nullptr; + + const EHEntry* begin = mEntriesBegin; + const EHEntry* end = mEntriesEnd; + MOZ_ASSERT(begin < end); + if (aPC < reinterpret_cast(begin->startPC.compute())) + return nullptr; + + while (end - begin > 1) { +#ifdef EHABI_UNWIND_MORE_ASSERTS + if ((end - 1)->startPC.compute() < begin->startPC.compute()) { + MOZ_CRASH("unsorted exidx"); + } +#endif + const EHEntry* mid = begin + (end - begin) / 2; + if (aPC < reinterpret_cast(mid->startPC.compute())) + end = mid; + else + begin = mid; + } + return begin; +} + +#if MOZ_LITTLE_ENDIAN() +static const unsigned char hostEndian = ELFDATA2LSB; +#elif MOZ_BIG_ENDIAN() +static const unsigned char hostEndian = ELFDATA2MSB; +#else +# error "No endian?" +#endif + +// Async signal unsafe: std::vector::reserve, std::string copy ctor. +EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName) + : mStartPC(~0), // largest uint32_t + mEndPC(0), + mEntriesBegin(nullptr), + mEntriesEnd(nullptr), + mName(aName) { + const uint32_t fileHeaderAddr = reinterpret_cast(aELF); + + if (aSize < sizeof(Elf32_Ehdr)) return; + + const Elf32_Ehdr& file = *(reinterpret_cast(fileHeaderAddr)); + if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 || + file.e_ident[EI_CLASS] != ELFCLASS32 || + file.e_ident[EI_DATA] != hostEndian || + file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM || + file.e_version != EV_CURRENT) + // e_flags? + return; + + MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize); + const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0; + for (unsigned i = 0; i < file.e_phnum; ++i) { + const Elf32_Phdr& phdr = *(reinterpret_cast( + fileHeaderAddr + file.e_phoff + i * file.e_phentsize)); + if (phdr.p_type == PT_ARM_EXIDX) { + exidxHdr = &phdr; + } else if (phdr.p_type == PT_LOAD) { + if (phdr.p_offset == 0) { + zeroHdr = &phdr; + } + if (phdr.p_flags & PF_X) { + mStartPC = std::min(mStartPC, phdr.p_vaddr); + mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz); + } + } + } + if (!exidxHdr) return; + if (!zeroHdr) return; + mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr; + mStartPC += mBaseAddress; + mEndPC += mBaseAddress; + mEntriesBegin = + reinterpret_cast(mBaseAddress + exidxHdr->p_vaddr); + mEntriesEnd = reinterpret_cast( + mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz); +} + +mozilla::Atomic EHAddrSpace::sCurrent(nullptr); + +// Async signal safe; can fail if Update() hasn't returned yet. +const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; } + +// Collect unwinding information from loaded objects. Calls after the +// first have no effect. Async signal unsafe. +void EHAddrSpace::Update() { + const EHAddrSpace* space = sCurrent; + if (space) return; + + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + std::vector tables; + + for (size_t i = 0; i < info.GetSize(); ++i) { + const SharedLibrary& lib = info.GetEntry(i); + // FIXME: This isn't correct if the start address isn't p_offset 0, because + // the start address will not point at the file header. But this is worked + // around by magic number checks in the EHTable constructor. + EHTable tab(reinterpret_cast(lib.GetStart()), + lib.GetEnd() - lib.GetStart(), lib.GetNativeDebugPath()); + if (tab.isValid()) tables.push_back(tab); + } + space = new EHAddrSpace(tables); + + if (!sCurrent.compareExchange(nullptr, space)) { + delete space; + space = sCurrent; + } +} + +EHState::EHState(const mcontext_t& context) { +#ifdef linux + mRegs[0] = context.arm_r0; + mRegs[1] = context.arm_r1; + mRegs[2] = context.arm_r2; + mRegs[3] = context.arm_r3; + mRegs[4] = context.arm_r4; + mRegs[5] = context.arm_r5; + mRegs[6] = context.arm_r6; + mRegs[7] = context.arm_r7; + mRegs[8] = context.arm_r8; + mRegs[9] = context.arm_r9; + mRegs[10] = context.arm_r10; + mRegs[11] = context.arm_fp; + mRegs[12] = context.arm_ip; + mRegs[13] = context.arm_sp; + mRegs[14] = context.arm_lr; + mRegs[15] = context.arm_pc; +#else +# error "Unhandled OS for ARM EHABI unwinding" +#endif +} + +} // namespace mozilla diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h new file mode 100644 index 0000000000..61286290b8 --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI; see the comment at the top of + * the .cpp file for details. + */ + +#ifndef mozilla_EHABIStackWalk_h__ +#define mozilla_EHABIStackWalk_h__ + +#include +#include + +namespace mozilla { + +void EHABIStackWalkInit(); + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, size_t aNumFrames); + +} // namespace mozilla + +#endif diff --git a/tools/profiler/core/MicroGeckoProfiler.cpp b/tools/profiler/core/MicroGeckoProfiler.cpp new file mode 100644 index 0000000000..bedb755742 --- /dev/null +++ b/tools/profiler/core/MicroGeckoProfiler.cpp @@ -0,0 +1,203 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoProfiler.h" + +#include "mozilla/Maybe.h" +#include "nsPrintfCString.h" +#include "public/GeckoTraceEvent.h" + +using namespace mozilla; +using webrtc::trace_event_internal::TraceValueUnion; + +void uprofiler_register_thread(const char* name, void* stacktop) { +#ifdef MOZ_GECKO_PROFILER + profiler_register_thread(name, stacktop); +#endif // MOZ_GECKO_PROFILER +} + +void uprofiler_unregister_thread() { +#ifdef MOZ_GECKO_PROFILER + profiler_unregister_thread(); +#endif // MOZ_GECKO_PROFILER +} + +#ifdef MOZ_GECKO_PROFILER +namespace { +Maybe ToTiming(char phase) { + switch (phase) { + case 'B': + return Some(MarkerTiming::IntervalStart()); + case 'E': + return Some(MarkerTiming::IntervalEnd()); + case 'I': + return Some(MarkerTiming::InstantNow()); + default: + return Nothing(); + } +} + +struct TraceOption { + bool mPassed = false; + ProfilerString8View mName; + Variant mValue = AsVariant(false); +}; + +struct TraceMarker { + static constexpr int MAX_NUM_ARGS = 2; + using OptionsType = std::tuple; + static constexpr mozilla::Span MarkerTypeName() { + return MakeStringSpan("TraceEvent"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const OptionsType& aArgs) { + auto writeValue = [&](const auto& aName, const auto& aVariant) { + aVariant.match( + [&](const int64_t& aValue) { aWriter.IntProperty(aName, aValue); }, + [&](const bool& aValue) { aWriter.BoolProperty(aName, aValue); }, + [&](const double& aValue) { aWriter.DoubleProperty(aName, aValue); }, + [&](const ProfilerString8View& aValue) { + aWriter.StringProperty(aName, aValue); + }); + }; + if (const auto& arg = std::get<0>(aArgs); arg.mPassed) { + aWriter.StringProperty("name1", arg.mName); + writeValue("val1", arg.mValue); + } + if (const auto& arg = std::get<1>(aArgs); arg.mPassed) { + aWriter.StringProperty("name2", arg.mName); + writeValue("val2", arg.mValue); + } + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.SetChartLabel("{marker.name}"); + schema.SetTableLabel( + "{marker.name} {marker.data.name1} {marker.data.val1} " + "{marker.data.name2} {marker.data.val2}"); + schema.AddKeyLabelFormatSearchable("name1", "Key 1", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("val1", "Value 1", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("name2", "Key 2", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("val2", "Value 2", MS::Format::String, + MS::Searchable::Searchable); + return schema; + } +}; +} // namespace + +namespace mozilla { +template <> +struct ProfileBufferEntryWriter::Serializer { + static Length Bytes(const TraceOption& aOption) { + // 1 byte to store passed flag, then object size if passed. + return aOption.mPassed ? (1 + SumBytes(aOption.mName, aOption.mValue)) : 1; + } + + static void Write(ProfileBufferEntryWriter& aEW, const TraceOption& aOption) { + // 'T'/'t' is just an arbitrary 1-byte value to distinguish states. + if (aOption.mPassed) { + aEW.WriteObject('T'); + // Use the Serializer for the name/value pair. + aEW.WriteObject(aOption.mName); + aEW.WriteObject(aOption.mValue); + } else { + aEW.WriteObject('t'); + } + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer { + static void ReadInto(ProfileBufferEntryReader& aER, TraceOption& aOption) { + char c = aER.ReadObject(); + if ((aOption.mPassed = (c == 'T'))) { + aER.ReadIntoObject(aOption.mName); + aER.ReadIntoObject(aOption.mValue); + } else { + MOZ_ASSERT(c == 't'); + } + } + + static TraceOption Read(ProfileBufferEntryReader& aER) { + TraceOption option; + ReadInto(aER, option); + return option; + } +}; +} // namespace mozilla +#endif // MOZ_GECKO_PROFILER + +void uprofiler_simple_event_marker(const char* name, char phase, int num_args, + const char** arg_names, + const unsigned char* arg_types, + const unsigned long long* arg_values) { +#ifdef MOZ_GECKO_PROFILER + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + Maybe timing = ToTiming(phase); + if (!timing) { + if (getenv("MOZ_LOG_UNKNOWN_TRACE_EVENT_PHASES")) { + fprintf(stderr, "XXX UProfiler: phase not handled: '%c'\n", phase); + } + return; + } + MOZ_ASSERT(num_args <= TraceMarker::MAX_NUM_ARGS); + TraceMarker::OptionsType tuple; + TraceOption* args[2] = {&std::get<0>(tuple), &std::get<1>(tuple)}; + for (int i = 0; i < std::min(num_args, TraceMarker::MAX_NUM_ARGS); ++i) { + auto& arg = *args[i]; + arg.mPassed = true; + arg.mName = ProfilerString8View::WrapNullTerminatedString(arg_names[i]); + switch (arg_types[i]) { + case TRACE_VALUE_TYPE_UINT: + MOZ_ASSERT(arg_values[i] <= std::numeric_limits::max()); + arg.mValue = AsVariant(static_cast( + reinterpret_cast(&arg_values[i])->as_uint)); + break; + case TRACE_VALUE_TYPE_INT: + arg.mValue = AsVariant(static_cast( + reinterpret_cast(&arg_values[i])->as_int)); + break; + case TRACE_VALUE_TYPE_BOOL: + arg.mValue = AsVariant( + reinterpret_cast(&arg_values[i])->as_bool); + break; + case TRACE_VALUE_TYPE_DOUBLE: + arg.mValue = + AsVariant(reinterpret_cast(&arg_values[i]) + ->as_double); + break; + case TRACE_VALUE_TYPE_POINTER: + arg.mValue = AsVariant(ProfilerString8View(nsPrintfCString( + "%p", reinterpret_cast(&arg_values[i]) + ->as_pointer))); + break; + case TRACE_VALUE_TYPE_STRING: + arg.mValue = AsVariant(ProfilerString8View::WrapNullTerminatedString( + reinterpret_cast(&arg_values[i]) + ->as_string)); + break; + case TRACE_VALUE_TYPE_COPY_STRING: + arg.mValue = AsVariant(ProfilerString8View( + nsCString(reinterpret_cast(&arg_values[i]) + ->as_string))); + break; + default: + MOZ_ASSERT_UNREACHABLE("Unexpected trace value type"); + arg.mValue = AsVariant(ProfilerString8View( + nsPrintfCString("Unexpected type: %u", arg_types[i]))); + break; + } + } + profiler_add_marker(ProfilerString8View::WrapNullTerminatedString(name), + geckoprofiler::category::MEDIA_RT, {timing.extract()}, + TraceMarker{}, tuple); +#endif // MOZ_GECKO_PROFILER +} diff --git a/tools/profiler/core/PageInformation.cpp b/tools/profiler/core/PageInformation.cpp new file mode 100644 index 0000000000..83d2d508a1 --- /dev/null +++ b/tools/profiler/core/PageInformation.cpp @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PageInformation.h" + +#include "mozilla/ProfileJSONWriter.h" + +PageInformation::PageInformation(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing) + : mTabID(aTabID), + mInnerWindowID(aInnerWindowID), + mUrl(aUrl), + mEmbedderInnerWindowID(aEmbedderInnerWindowID), + mIsPrivateBrowsing(aIsPrivateBrowsing) {} + +bool PageInformation::Equals(PageInformation* aOtherPageInfo) const { + // It's enough to check inner window IDs because they are unique for each + // page. Therefore, we don't have to check the tab ID or url. + return InnerWindowID() == aOtherPageInfo->InnerWindowID(); +} + +void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const { + // Here, we are converting uint64_t to double. Both tab and Inner + // Window IDs are created using `nsContentUtils::GenerateProcessSpecificId`, + // which is specifically designed to only use 53 of the 64 bits to be lossless + // when passed into and out of JS as a double. + aWriter.StartObjectElement(); + aWriter.DoubleProperty("tabID", TabID()); + aWriter.DoubleProperty("innerWindowID", InnerWindowID()); + aWriter.StringProperty("url", Url()); + aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID()); + aWriter.BoolProperty("isPrivateBrowsing", IsPrivateBrowsing()); + aWriter.EndObject(); +} + +size_t PageInformation::SizeOfIncludingThis( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this); +} diff --git a/tools/profiler/core/PageInformation.h b/tools/profiler/core/PageInformation.h new file mode 100644 index 0000000000..6c9039b9a4 --- /dev/null +++ b/tools/profiler/core/PageInformation.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PageInformation_h +#define PageInformation_h + +#include "mozilla/Maybe.h" +#include "mozilla/MemoryReporting.h" +#include "nsISupportsImpl.h" +#include "nsString.h" + +namespace mozilla { +namespace baseprofiler { +class SpliceableJSONWriter; +} // namespace baseprofiler +} // namespace mozilla + +// This class contains information that's relevant to a single page only +// while the page information is important and registered with the profiler, +// but regardless of whether the profiler is running. All accesses to it are +// protected by the profiler state lock. +// When the page gets unregistered, we keep the profiler buffer position +// to determine if we are still using this page. If not, we unregister +// it in the next page registration. +class PageInformation final { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PageInformation) + PageInformation(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + bool Equals(PageInformation* aOtherPageInfo) const; + void StreamJSON(mozilla::baseprofiler::SpliceableJSONWriter& aWriter) const; + + uint64_t InnerWindowID() const { return mInnerWindowID; } + uint64_t TabID() const { return mTabID; } + const nsCString& Url() const { return mUrl; } + uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; } + bool IsPrivateBrowsing() const { return mIsPrivateBrowsing; } + + mozilla::Maybe BufferPositionWhenUnregistered() const { + return mBufferPositionWhenUnregistered; + } + + void NotifyUnregistered(uint64_t aBufferPosition) { + mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition); + } + + private: + const uint64_t mTabID; + const uint64_t mInnerWindowID; + const nsCString mUrl; + const uint64_t mEmbedderInnerWindowID; + const bool mIsPrivateBrowsing; + + // Holds the buffer position when page is unregistered. + // It's used to determine if we still use this page in the profiler or + // not. + mozilla::Maybe mBufferPositionWhenUnregistered; + + virtual ~PageInformation() = default; +}; + +#endif // PageInformation_h diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h new file mode 100644 index 0000000000..c72e94c128 --- /dev/null +++ b/tools/profiler/core/PlatformMacros.h @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PLATFORM_MACROS_H +#define PLATFORM_MACROS_H + +// Define platform selection macros in a consistent way. Don't add anything +// else to this file, so it can remain freestanding. The primary factorisation +// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined +// too, since they are sometimes convenient. +// +// Note: "GP" is short for "Gecko Profiler". + +#undef GP_PLAT_x86_android +#undef GP_PLAT_amd64_android +#undef GP_PLAT_arm_android +#undef GP_PLAT_arm64_android +#undef GP_PLAT_x86_linux +#undef GP_PLAT_amd64_linux +#undef GP_PLAT_arm_linux +#undef GP_PLAT_mips64_linux +#undef GP_PLAT_amd64_darwin +#undef GP_PLAT_arm64_darwin +#undef GP_PLAT_x86_windows +#undef GP_PLAT_amd64_windows +#undef GP_PLAT_arm64_windows + +#undef GP_ARCH_x86 +#undef GP_ARCH_amd64 +#undef GP_ARCH_arm +#undef GP_ARCH_arm64 +#undef GP_ARCH_mips64 + +#undef GP_OS_android +#undef GP_OS_linux +#undef GP_OS_darwin +#undef GP_OS_windows + +// We test __ANDROID__ before __linux__ because __linux__ is defined on both +// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux. + +#if defined(__ANDROID__) && defined(__i386__) +# define GP_PLAT_x86_android 1 +# define GP_ARCH_x86 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__x86_64__) +# define GP_PLAT_amd64_android 1 +# define GP_ARCH_amd64 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define GP_PLAT_arm_android 1 +# define GP_ARCH_arm 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__aarch64__) +# define GP_PLAT_arm64_android 1 +# define GP_ARCH_arm64 1 +# define GP_OS_android 1 + +#elif defined(__linux__) && defined(__i386__) +# define GP_PLAT_x86_linux 1 +# define GP_ARCH_x86 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__x86_64__) +# define GP_PLAT_amd64_linux 1 +# define GP_ARCH_amd64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__arm__) +# define GP_PLAT_arm_linux 1 +# define GP_ARCH_arm 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__aarch64__) +# define GP_PLAT_arm64_linux 1 +# define GP_ARCH_arm64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__mips64) +# define GP_PLAT_mips64_linux 1 +# define GP_ARCH_mips64 1 +# define GP_OS_linux 1 + +#elif defined(__APPLE__) && defined(__aarch64__) +# define GP_PLAT_arm64_darwin 1 +# define GP_ARCH_arm64 1 +# define GP_OS_darwin 1 + +#elif defined(__APPLE__) && defined(__x86_64__) +# define GP_PLAT_amd64_darwin 1 +# define GP_ARCH_amd64 1 +# define GP_OS_darwin 1 + +#elif defined(__FreeBSD__) && defined(__x86_64__) +# define GP_PLAT_amd64_freebsd 1 +# define GP_ARCH_amd64 1 +# define GP_OS_freebsd 1 + +#elif defined(__FreeBSD__) && defined(__aarch64__) +# define GP_PLAT_arm64_freebsd 1 +# define GP_ARCH_arm64 1 +# define GP_OS_freebsd 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_IX86) || defined(__i386__)) +# define GP_PLAT_x86_windows 1 +# define GP_ARCH_x86 1 +# define GP_OS_windows 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_X64) || defined(__x86_64__)) +# define GP_PLAT_amd64_windows 1 +# define GP_ARCH_amd64 1 +# define GP_OS_windows 1 + +#elif defined(_MSC_VER) && defined(_M_ARM64) +# define GP_PLAT_arm64_windows 1 +# define GP_ARCH_arm64 1 +# define GP_OS_windows 1 + +#else +# error "Unsupported platform" +#endif + +#endif /* ndef PLATFORM_MACROS_H */ diff --git a/tools/profiler/core/PowerCounters-linux.cpp b/tools/profiler/core/PowerCounters-linux.cpp new file mode 100644 index 0000000000..006cea4867 --- /dev/null +++ b/tools/profiler/core/PowerCounters-linux.cpp @@ -0,0 +1,287 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsXULAppAPI.h" +#include "mozilla/Maybe.h" +#include "mozilla/Logging.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// From the kernel rapl_scale() function: +// +// > users must then scale back: count * 1/(1e9*2^32) to get Joules +#define PERF_EVENT_SCALE_NANOJOULES 2.3283064365386962890625e-1 +#define SCALE_NANOJOULES_TO_PICOWATTHOUR 3.6 +#define SYSFS_PERF_POWER_TYPE_PATH "/sys/bus/event_source/devices/power/type" + +static mozilla::LazyLogModule sRaplEventLog("profiler.rapl"); +#define RAPL_LOG(...) \ + MOZ_LOG(sRaplEventLog, mozilla::LogLevel::Debug, (__VA_ARGS__)); + +enum class RaplEventType : uint64_t { + RAPL_ENERGY_CORES = 0x01, + RAPL_ENERGY_PKG = 0x02, + RAPL_ENERGY_DRAM = 0x03, + RAPL_ENERGY_GPU = 0x04, + RAPL_ENERGY_PSYS = 0x05, +}; + +struct RaplDomain { + RaplEventType mRaplEventType; + const char* mLabel; + const char* mDescription; +}; + +constexpr RaplDomain kSupportedRaplDomains[] = { + {RaplEventType::RAPL_ENERGY_CORES, "Power: CPU cores", + "Consumption of all physical cores"}, + { + RaplEventType::RAPL_ENERGY_PKG, + "Power: CPU package", + "Consumption of the whole processor package", + }, + { + RaplEventType::RAPL_ENERGY_DRAM, + "Power: DRAM", + "Consumption of the dram domain", + }, + { + RaplEventType::RAPL_ENERGY_GPU, + "Power: iGPU", + "Consumption of the builtin-gpu domain", + }, + { + RaplEventType::RAPL_ENERGY_PSYS, + "Power: System", + "Consumption of the builtin-psys domain", + }}; + +static std::string GetSysfsFileID(RaplEventType aEventType) { + switch (aEventType) { + case RaplEventType::RAPL_ENERGY_CORES: + return "cores"; + case RaplEventType::RAPL_ENERGY_PKG: + return "pkg"; + case RaplEventType::RAPL_ENERGY_DRAM: + return "ram"; + case RaplEventType::RAPL_ENERGY_GPU: + return "gpu"; + case RaplEventType::RAPL_ENERGY_PSYS: + return "psys"; + } + + return ""; +} + +static double GetRaplPerfEventScale(RaplEventType aEventType) { + const std::string sysfsFileName = + "/sys/bus/event_source/devices/power/events/energy-" + + GetSysfsFileID(aEventType) + ".scale"; + std::ifstream sysfsFile(sysfsFileName); + + if (!sysfsFile) { + return PERF_EVENT_SCALE_NANOJOULES; + } + + double scale; + + if (sysfsFile >> scale) { + RAPL_LOG("Read scale from %s: %.22e", sysfsFileName.c_str(), scale); + return scale * 1e9; + } + + return PERF_EVENT_SCALE_NANOJOULES; +} + +static uint64_t GetRaplPerfEventConfig(RaplEventType aEventType) { + const std::string sysfsFileName = + "/sys/bus/event_source/devices/power/events/energy-" + + GetSysfsFileID(aEventType); + std::ifstream sysfsFile(sysfsFileName); + + if (!sysfsFile) { + return static_cast(aEventType); + } + + char buffer[7] = {}; + const std::string key = "event="; + + if (!sysfsFile.get(buffer, static_cast(key.length()) + 1) || + key != buffer) { + return static_cast(aEventType); + } + + uint64_t config; + + if (sysfsFile >> std::hex >> config) { + RAPL_LOG("Read config from %s: 0x%" PRIx64, sysfsFileName.c_str(), config); + return config; + } + + return static_cast(aEventType); +} + +class RaplProfilerCount final : public BaseProfilerCount { + public: + explicit RaplProfilerCount(int aPerfEventType, + const RaplEventType& aPerfEventConfig, + const char* aLabel, const char* aDescription) + : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription), + mLastResult(0), + mPerfEventFd(-1) { + RAPL_LOG("Creating RAPL Event for type: %s", mLabel); + + // Optimize for ease of use and do not set an excludes value. This + // ensures we do not require PERF_PMU_CAP_NO_EXCLUDE. + struct perf_event_attr attr = {0}; + memset(&attr, 0, sizeof(attr)); + attr.type = aPerfEventType; + attr.size = sizeof(struct perf_event_attr); + attr.config = GetRaplPerfEventConfig(aPerfEventConfig); + attr.sample_period = 0; + attr.sample_type = PERF_SAMPLE_IDENTIFIER; + attr.inherit = 1; + + RAPL_LOG("Config for event %s: 0x%llx", mLabel, attr.config); + + mEventScale = GetRaplPerfEventScale(aPerfEventConfig); + RAPL_LOG("Scale for event %s: %.22e", mLabel, mEventScale); + + long fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (fd < 0) { + RAPL_LOG("Event descriptor creation failed for event: %s", mLabel); + mPerfEventFd = -1; + return; + } + + RAPL_LOG("Created descriptor for event: %s", mLabel) + mPerfEventFd = static_cast(fd); + } + + ~RaplProfilerCount() { + if (ValidPerfEventFd()) { + ioctl(mPerfEventFd, PERF_EVENT_IOC_DISABLE, 0); + close(mPerfEventFd); + } + } + + RaplProfilerCount(const RaplProfilerCount&) = delete; + RaplProfilerCount& operator=(const RaplProfilerCount&) = delete; + + CountSample Sample() override { + CountSample result = { + .count = 0, + .number = 0, + .isSampleNew = false, + }; + mozilla::Maybe raplEventResult = ReadEventFd(); + + if (raplEventResult.isNothing()) { + return result; + } + + // We need to return picowatthour to be consistent with the Windows + // EMI API. As a result, the scale calculation should: + // + // - Convert the returned value to nanojoules + // - Convert nanojoules to picowatthour + double nanojoules = + static_cast(raplEventResult.value()) * mEventScale; + double picowatthours = nanojoules / SCALE_NANOJOULES_TO_PICOWATTHOUR; + RAPL_LOG("Sample %s { count: %lu, last-result: %lu } = %lfJ", mLabel, + raplEventResult.value(), mLastResult, nanojoules * 1e-9); + + result.count = static_cast(picowatthours); + + // If the tick count is the same as the returned value or if this is the + // first sample, treat this sample as a duplicate. + result.isSampleNew = + (mLastResult != 0 && mLastResult != raplEventResult.value() && + result.count >= 0); + mLastResult = raplEventResult.value(); + + return result; + } + + bool ValidPerfEventFd() { return mPerfEventFd >= 0; } + + private: + mozilla::Maybe ReadEventFd() { + MOZ_ASSERT(ValidPerfEventFd()); + + uint64_t eventResult; + ssize_t readBytes = read(mPerfEventFd, &eventResult, sizeof(uint64_t)); + if (readBytes != sizeof(uint64_t)) { + RAPL_LOG("Invalid RAPL event read size: %ld", readBytes); + return mozilla::Nothing(); + } + + return mozilla::Some(eventResult); + } + + uint64_t mLastResult; + int mPerfEventFd; + double mEventScale; +}; + +static int GetRaplPerfEventType() { + FILE* fp = fopen(SYSFS_PERF_POWER_TYPE_PATH, "r"); + if (!fp) { + RAPL_LOG("Open of " SYSFS_PERF_POWER_TYPE_PATH " failed"); + return -1; + } + + int readTypeValue = -1; + if (fscanf(fp, "%d", &readTypeValue) != 1) { + RAPL_LOG("Read of " SYSFS_PERF_POWER_TYPE_PATH " failed"); + } + fclose(fp); + + return readTypeValue; +} + +PowerCounters::PowerCounters() { + if (!XRE_IsParentProcess()) { + // Energy meters are global, so only sample them on the parent. + return; + } + + // Get the value perf_event_attr.type should be set to for RAPL + // perf events. + int perfEventType = GetRaplPerfEventType(); + if (perfEventType < 0) { + RAPL_LOG("Failed to find the event type for RAPL perf events."); + return; + } + + for (const auto& raplEventDomain : kSupportedRaplDomains) { + RaplProfilerCount* raplEvent = new RaplProfilerCount( + perfEventType, raplEventDomain.mRaplEventType, raplEventDomain.mLabel, + raplEventDomain.mDescription); + if (!raplEvent->ValidPerfEventFd() || !mCounters.emplaceBack(raplEvent)) { + delete raplEvent; + } + } +} + +PowerCounters::~PowerCounters() { + for (auto* raplEvent : mCounters) { + delete raplEvent; + } + mCounters.clear(); +} + +void PowerCounters::Sample() {} diff --git a/tools/profiler/core/PowerCounters-mac-amd64.cpp b/tools/profiler/core/PowerCounters-mac-amd64.cpp new file mode 100644 index 0000000000..540cee155d --- /dev/null +++ b/tools/profiler/core/PowerCounters-mac-amd64.cpp @@ -0,0 +1,419 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsDebug.h" +#include "nsPrintfCString.h" +#include "nsXULAppAPI.h" // for XRE_IsParentProcess + +// Because of the pkg_energy_statistics_t::pkes_version check below, the +// earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72). + +#include +#include + +// OS X has four kinds of system calls: +// +// 1. Mach traps; +// 2. UNIX system calls; +// 3. machine-dependent calls; +// 4. diagnostic calls. +// +// (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.) +// +// The last category has a single call named diagCall() or diagCall64(). Its +// mode is controlled by its first argument, and one of the modes allows access +// to the Intel RAPL MSRs. +// +// The interface to diagCall64() is not exported, so we have to import some +// definitions from the XNU kernel. All imported definitions are annotated with +// the XNU source file they come from, and information about what XNU versions +// they were introduced in and (if relevant) modified. + +// The diagCall64() mode. +// From osfmk/i386/Diagnostics.h +// - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value +// 17 was used for dgGzallocTest.) +#define dgPowerStat 17 + +// From osfmk/i386/cpu_data.h +// - In 10.8.5 these values were introduced, along with core_energy_stat_t. +#define CPU_RTIME_BINS (12) +#define CPU_ITIME_BINS (CPU_RTIME_BINS) + +// core_energy_stat_t and pkg_energy_statistics_t are both from +// osfmk/i386/Diagnostics.c. +// - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many +// fewer fields. +// - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with +// numerous new fields. +// - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added. +// diagCall64(dgPowerStat) fills it with '1' in all versions since (up to +// 10.10.2 at time of writing). +// - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally +// added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the +// source code, but it could be defined at compile-time via compiler flags.) +// pkg_energy_statistics_t::pkes_version did not change, though. + +typedef struct { + uint64_t caperf; + uint64_t cmperf; + uint64_t ccres[6]; + uint64_t crtimes[CPU_RTIME_BINS]; + uint64_t citimes[CPU_ITIME_BINS]; + uint64_t crtime_total; + uint64_t citime_total; + uint64_t cpu_idle_exits; + uint64_t cpu_insns; + uint64_t cpu_ucc; + uint64_t cpu_urc; +#if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72). + uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72). +#endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72). +} core_energy_stat_t; + +typedef struct { + uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72). + uint64_t pkg_cres[2][7]; + + // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT + // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT. + uint64_t pkg_power_unit; + + // These are the four fields for the four RAPL domains. For each field + // we list: + // + // - the corresponding MSR number; + // - Intel's name for that MSR; + // - XNU's name for that MSR; + // - which Intel processors the MSR is supported on. + // + // The last of these is determined from chapter 35 of Volume 3 of the + // "Intel 64 and IA-32 Architecture's Software Developer's Manual", + // Order Number 325384. (Note that chapter 35 contradicts section 14.9 + // to some degree.) + + // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS + // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). + uint64_t pkg_energy; + + // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS + // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). + uint64_t pp0_energy; + + // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS + // Sandy Bridge, Haswell. + uint64_t pp1_energy; + + // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS + // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model + // 0x57) + uint64_t ddr_energy; + + uint64_t llc_flushed_cycles; + uint64_t ring_ratio_instantaneous; + uint64_t IA_frequency_clipping_cause; + uint64_t GT_frequency_clipping_cause; + uint64_t pkg_idle_exits; + uint64_t pkg_rtimes[CPU_RTIME_BINS]; + uint64_t pkg_itimes[CPU_ITIME_BINS]; + uint64_t mbus_delay_time; + uint64_t mint_delay_time; + uint32_t ncpus; + core_energy_stat_t cest[]; +} pkg_energy_statistics_t; + +static int diagCall64(uint64_t aMode, void* aBuf) { + // We cannot use syscall() here because it doesn't work with diagnostic + // system calls -- it raises SIGSYS if you try. So we have to use asm. + +#ifdef __x86_64__ + // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01 + // suffix indicates the syscall number is 1, which also happens to be the + // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more + // details. + static const uint64_t diagCallNum = 0x4000001; + uint64_t rv; + + __asm__ __volatile__( + "syscall" + + // Return value goes in "a" (%rax). + : /* outputs */ "=a"(rv) + + // The syscall number goes in "0", a synonym (from outputs) for "a" + // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi). + : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf) + + // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And + // this particular syscall also writes memory (aBuf). + : /* clobbers */ "rcx", "r11", "cc", "memory"); + return rv; +#else +# error Sorry, only x86-64 is supported +#endif +} + +// This is a counter to collect power utilization during profiling. +// It cannot be a raw `ProfilerCounter` because we need to manually add/remove +// it while the profiler lock is already held. +class RaplDomain final : public BaseProfilerCount { + public: + explicit RaplDomain(const char* aLabel, const char* aDescription) + : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription), + mSample(0), + mEnergyStatusUnits(0), + mWrapAroundCount(0), + mIsSampleNew(false) {} + + CountSample Sample() override { + CountSample result; + + // To be consistent with the Windows EMI API, + // return values in picowatt-hour. + constexpr double NANOJOULES_PER_JOULE = 1'000'000'000; + constexpr double NANOJOULES_TO_PICOWATTHOUR = 3.6; + + uint64_t ticks = (uint64_t(mWrapAroundCount) << 32) + mSample; + double joulesPerTick = (double)1 / (1 << mEnergyStatusUnits); + result.count = static_cast(ticks) * joulesPerTick * + NANOJOULES_PER_JOULE / NANOJOULES_TO_PICOWATTHOUR; + + result.number = 0; + result.isSampleNew = mIsSampleNew; + mIsSampleNew = false; + return result; + } + + void AddSample(uint32_t aSample, uint32_t aEnergyStatusUnits) { + if (aSample == mSample) { + return; + } + + mEnergyStatusUnits = aEnergyStatusUnits; + + if (aSample > mSample) { + mIsSampleNew = true; + mSample = aSample; + return; + } + + // Despite being returned in uint64_t fields, the power counter values + // only use the lowest 32 bits of their fields, and we need to handle + // wraparounds to avoid our power tracks stopping after a few hours. + constexpr uint32_t highestBit = 1 << 31; + if ((mSample & highestBit) && !(aSample & highestBit)) { + mIsSampleNew = true; + ++mWrapAroundCount; + mSample = aSample; + } else { + NS_WARNING("unexpected sample with smaller value"); + } + } + + private: + uint32_t mSample; + uint32_t mEnergyStatusUnits; + uint32_t mWrapAroundCount; + bool mIsSampleNew; +}; + +class RAPL { + bool mIsGpuSupported; // Is the GPU domain supported by the processor? + bool mIsRamSupported; // Is the RAM domain supported by the processor? + + // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J == + // 15.3 microJoules) which is different to the power unit MSR. (See the + // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of + // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.) + // This field records whether the quirk is present. + bool mHasRamUnitsQuirk; + + // The abovementioned 15.3 microJoules value. (2^16 = 65536) + static constexpr double kQuirkyRamEnergyStatusUnits = 16; + + // The struct passed to diagCall64(). + pkg_energy_statistics_t* mPkes; + + RaplDomain* mPkg = nullptr; + RaplDomain* mCores = nullptr; + RaplDomain* mGpu = nullptr; + RaplDomain* mRam = nullptr; + + public: + explicit RAPL(PowerCounters::CountVector& aCounters) + : mHasRamUnitsQuirk(false) { + // Work out which RAPL MSRs this CPU model supports. + int cpuModel; + size_t size = sizeof(cpuModel); + if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL, 0) != 0) { + NS_WARNING("sysctlbyname(\"machdep.cpu.model\") failed"); + return; + } + + // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in + // linux-4.1.5/. + // + // By linux-5.6.14/, this stuff had moved into + // arch/x86/events/intel/rapl.c, which references processor families in + // arch/x86/include/asm/intel-family.h. + switch (cpuModel) { + case 0x2a: // Sandy Bridge + case 0x3a: // Ivy Bridge + // Supports package, cores, GPU. + mIsGpuSupported = true; + mIsRamSupported = false; + break; + + case 0x3f: // Haswell X + case 0x4f: // Broadwell X + case 0x55: // Skylake X + case 0x56: // Broadwell D + // Supports package, cores, RAM. Has the units quirk. + mIsGpuSupported = false; + mIsRamSupported = true; + mHasRamUnitsQuirk = true; + break; + + case 0x2d: // Sandy Bridge X + case 0x3e: // Ivy Bridge X + // Supports package, cores, RAM. + mIsGpuSupported = false; + mIsRamSupported = true; + break; + + case 0x3c: // Haswell + case 0x3d: // Broadwell + case 0x45: // Haswell L + case 0x46: // Haswell G + case 0x47: // Broadwell G + // Supports package, cores, GPU, RAM. + mIsGpuSupported = true; + mIsRamSupported = true; + break; + + case 0x4e: // Skylake L + case 0x5e: // Skylake + case 0x8e: // Kaby Lake L + case 0x9e: // Kaby Lake + case 0x66: // Cannon Lake L + case 0x7d: // Ice Lake + case 0x7e: // Ice Lake L + case 0xa5: // Comet Lake + case 0xa6: // Comet Lake L + // Supports package, cores, GPU, RAM, PSYS. + // XXX: this tool currently doesn't measure PSYS. + mIsGpuSupported = true; + mIsRamSupported = true; + break; + + default: + NS_WARNING(nsPrintfCString("unknown CPU model: %d", cpuModel).get()); + return; + } + + // Get the maximum number of logical CPUs so that we know how big to make + // |mPkes|. + int logicalcpu_max; + size = sizeof(logicalcpu_max); + if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL, 0) != + 0) { + NS_WARNING("sysctlbyname(\"hw.logicalcpu_max\") failed"); + return; + } + + // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around + // core_energy_stat_t::gpmcs and for any other future extensions to that + // struct. (The fields we read all come before the core_energy_stat_t + // array, so it won't matter to us whether gpmcs is present or not.) + size_t pkesSize = sizeof(pkg_energy_statistics_t) + + logicalcpu_max * sizeof(core_energy_stat_t) + + logicalcpu_max * 1024; + mPkes = (pkg_energy_statistics_t*)malloc(pkesSize); + if (mPkes && aCounters.reserve(4)) { + mPkg = new RaplDomain("Power: CPU package", "RAPL PKG"); + aCounters.infallibleAppend(mPkg); + + mCores = new RaplDomain("Power: CPU cores", "RAPL PP0"); + aCounters.infallibleAppend(mCores); + + if (mIsGpuSupported) { + mGpu = new RaplDomain("Power: iGPU", "RAPL PP1"); + aCounters.infallibleAppend(mGpu); + } + + if (mIsRamSupported) { + mRam = new RaplDomain("Power: DRAM", "RAPL DRAM"); + aCounters.infallibleAppend(mRam); + } + } + } + + ~RAPL() { + free(mPkes); + delete mPkg; + delete mCores; + delete mGpu; + delete mRam; + } + + void Sample() { + constexpr uint64_t kSupportedVersion = 1; + + // If we failed to allocate the memory for package energy statistics, we + // have nothing to sample. + if (MOZ_UNLIKELY(!mPkes)) { + return; + } + + // Write an unsupported version number into pkes_version so that the check + // below cannot succeed by dumb luck. + mPkes->pkes_version = kSupportedVersion - 1; + + // diagCall64() returns 1 on success, and 0 on failure (which can only + // happen if the mode is unrecognized, e.g. in 10.7.x or earlier versions). + if (diagCall64(dgPowerStat, mPkes) != 1) { + NS_WARNING("diagCall64() failed"); + return; + } + + if (mPkes->pkes_version != kSupportedVersion) { + NS_WARNING( + nsPrintfCString("unexpected pkes_version: %llu", mPkes->pkes_version) + .get()); + return; + } + + // Bits 12:8 are the ESU. + // Energy measurements come in multiples of 1/(2^ESU). + uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f; + mPkg->AddSample(mPkes->pkg_energy, energyStatusUnits); + mCores->AddSample(mPkes->pp0_energy, energyStatusUnits); + if (mIsGpuSupported) { + mGpu->AddSample(mPkes->pp1_energy, energyStatusUnits); + } + if (mIsRamSupported) { + mRam->AddSample(mPkes->ddr_energy, mHasRamUnitsQuirk + ? kQuirkyRamEnergyStatusUnits + : energyStatusUnits); + } + } +}; + +PowerCounters::PowerCounters() { + // RAPL values are global, so only sample them on the parent. + mRapl = XRE_IsParentProcess() ? new RAPL(mCounters) : nullptr; +} + +PowerCounters::~PowerCounters() { + mCounters.clear(); + delete mRapl; + mRapl = nullptr; +} + +void PowerCounters::Sample() { + if (mRapl) { + mRapl->Sample(); + } +} diff --git a/tools/profiler/core/PowerCounters-mac-arm64.cpp b/tools/profiler/core/PowerCounters-mac-arm64.cpp new file mode 100644 index 0000000000..3a84a479ef --- /dev/null +++ b/tools/profiler/core/PowerCounters-mac-arm64.cpp @@ -0,0 +1,47 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" + +#include + +class ProcessPower final : public BaseProfilerCount { + public: + ProcessPower() + : BaseProfilerCount("Process Power", nullptr, nullptr, "power", + "Power utilization") {} + + CountSample Sample() override { + CountSample result; + result.count = GetTaskEnergy(); + result.number = 0; + result.isSampleNew = true; + return result; + } + + private: + int64_t GetTaskEnergy() { + task_power_info_v2_data_t task_power_info; + mach_msg_type_number_t count = TASK_POWER_INFO_V2_COUNT; + kern_return_t kr = task_info(mach_task_self(), TASK_POWER_INFO_V2, + (task_info_t)&task_power_info, &count); + if (kr != KERN_SUCCESS) { + return 0; + } + + // task_energy is in nanojoules. To be consistent with the Windows EMI + // API, return values in picowatt-hour. + return task_power_info.task_energy / 3.6; + } +}; + +PowerCounters::PowerCounters() : mProcessPower(new ProcessPower()) { + if (mProcessPower) { + (void)mCounters.append(mProcessPower.get()); + } +} + +PowerCounters::~PowerCounters() { mCounters.clear(); } + +void PowerCounters::Sample() {} diff --git a/tools/profiler/core/PowerCounters-win.cpp b/tools/profiler/core/PowerCounters-win.cpp new file mode 100644 index 0000000000..f1d05389b6 --- /dev/null +++ b/tools/profiler/core/PowerCounters-win.cpp @@ -0,0 +1,342 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsXULAppAPI.h" // for XRE_IsParentProcess +#include "nsString.h" + +#include +#include +#include // for SetupDi* +// LogSeverity, defined by setupapi.h to DWORD, messes with other code. +#undef LogSeverity + +#undef NTDDI_VERSION +#define NTDDI_VERSION NTDDI_WINBLUE +#include + +#ifndef NTDDI_WIN10_RS5 +// EMI v2 API exists in SDK 10.0.17763 (Windows 10 1809 / Redstone 5) and later. +// Our build machines are still on SDK 10.0.17134. +// Remove this block when updating the SDK (bug 1774628). +typedef EMI_METADATA EMI_METADATA_V1; +typedef EMI_MEASUREMENT_DATA EMI_CHANNEL_MEASUREMENT_DATA; +# define EMI_VERSION_V2 2 + +typedef struct { + EMI_MEASUREMENT_UNIT MeasurementUnit; + USHORT ChannelNameSize; + WCHAR ChannelName[ANYSIZE_ARRAY]; +} EMI_CHANNEL_V2; + +typedef struct { + WCHAR HardwareOEM[EMI_NAME_MAX]; + WCHAR HardwareModel[EMI_NAME_MAX]; + USHORT HardwareRevision; + USHORT ChannelCount; + EMI_CHANNEL_V2 Channels[ANYSIZE_ARRAY]; +} EMI_METADATA_V2; + +# define EMI_CHANNEL_V2_LENGTH(_ChannelNameSize) \ + (FIELD_OFFSET(EMI_CHANNEL_V2, ChannelName) + (_ChannelNameSize)) + +# define EMI_CHANNEL_V2_NEXT_CHANNEL(_Channel) \ + ((EMI_CHANNEL_V2*)((PUCHAR)(_Channel) + \ + EMI_CHANNEL_V2_LENGTH((_Channel)->ChannelNameSize))) +#endif + +using namespace mozilla; + +// This is a counter to collect power utilization during profiling. +// It cannot be a raw `ProfilerCounter` because we need to manually add/remove +// it while the profiler lock is already held. +class PowerMeterChannel final : public BaseProfilerCount { + public: + explicit PowerMeterChannel(const WCHAR* aChannelName, ULONGLONG aInitialValue, + ULONGLONG aInitialTime) + : BaseProfilerCount(nullptr, nullptr, nullptr, "power", + "Power utilization"), + mChannelName(NS_ConvertUTF16toUTF8(aChannelName)), + mPreviousValue(aInitialValue), + mPreviousTime(aInitialTime), + mIsSampleNew(true) { + if (mChannelName.Equals("RAPL_Package0_PKG")) { + mLabel = "Power: CPU package"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_PP0")) { + mLabel = "Power: CPU cores"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_PP1")) { + mLabel = "Power: iGPU"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_DRAM")) { + mLabel = "Power: DRAM"; + mDescription = mChannelName.get(); + } else { + unsigned int coreId; + if (sscanf(mChannelName.get(), "RAPL_Package0_Core%u_CORE", &coreId) == + 1) { + mLabelString = "Power: CPU core "; + mLabelString.AppendInt(coreId); + mLabel = mLabelString.get(); + mDescription = mChannelName.get(); + } else { + mLabel = mChannelName.get(); + } + } + } + + CountSample Sample() override { + CountSample result; + result.count = mCounter; + result.number = 0; + result.isSampleNew = mIsSampleNew; + mIsSampleNew = false; + return result; + } + + void AddSample(ULONGLONG aAbsoluteEnergy, ULONGLONG aAbsoluteTime) { + // aAbsoluteTime is the time since the system start in 100ns increments. + if (aAbsoluteTime == mPreviousTime) { + return; + } + + if (aAbsoluteEnergy > mPreviousValue) { + int64_t increment = aAbsoluteEnergy - mPreviousValue; + mCounter += increment; + mPreviousValue += increment; + mPreviousTime = aAbsoluteTime; + } + + mIsSampleNew = true; + } + + private: + int64_t mCounter; + nsCString mChannelName; + + // Used as a storage when the label can not be a literal string. + nsCString mLabelString; + + ULONGLONG mPreviousValue; + ULONGLONG mPreviousTime; + bool mIsSampleNew; +}; + +class PowerMeterDevice { + public: + explicit PowerMeterDevice(LPCTSTR aDevicePath) { + mHandle = ::CreateFile(aDevicePath, GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (mHandle == INVALID_HANDLE_VALUE) { + return; + } + + EMI_VERSION version = {0}; + DWORD dwOut; + + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_VERSION, nullptr, 0, &version, + sizeof(version), &dwOut, nullptr) || + (version.EmiVersion != EMI_VERSION_V1 && + version.EmiVersion != EMI_VERSION_V2)) { + return; + } + + EMI_METADATA_SIZE size = {0}; + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA_SIZE, nullptr, 0, + &size, sizeof(size), &dwOut, nullptr) || + !size.MetadataSize) { + return; + } + + UniquePtr metadata(new (std::nothrow) + uint8_t[size.MetadataSize]); + if (!metadata) { + return; + } + + if (version.EmiVersion == EMI_VERSION_V2) { + EMI_METADATA_V2* metadata2 = + reinterpret_cast(metadata.get()); + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0, + metadata2, size.MetadataSize, &dwOut, nullptr)) { + return; + } + + if (!mChannels.reserve(metadata2->ChannelCount)) { + return; + } + + mDataBuffer = + MakeUnique(metadata2->ChannelCount); + if (!mDataBuffer) { + return; + } + + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA[metadata2->ChannelCount]), + &dwOut, nullptr)) { + return; + } + + EMI_CHANNEL_V2* channel = &metadata2->Channels[0]; + for (int i = 0; i < metadata2->ChannelCount; ++i) { + EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i]; + mChannels.infallibleAppend(new PowerMeterChannel( + channel->ChannelName, channel_data->AbsoluteEnergy, + channel_data->AbsoluteTime)); + channel = EMI_CHANNEL_V2_NEXT_CHANNEL(channel); + } + } else if (version.EmiVersion == EMI_VERSION_V1) { + EMI_METADATA_V1* metadata1 = + reinterpret_cast(metadata.get()); + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0, + metadata1, size.MetadataSize, &dwOut, nullptr)) { + return; + } + + mDataBuffer = MakeUnique(1); + if (!mDataBuffer) { + return; + } + + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA), &dwOut, nullptr)) { + return; + } + + (void)mChannels.append(new PowerMeterChannel( + metadata1->MeteredHardwareName, mDataBuffer[0].AbsoluteEnergy, + mDataBuffer[0].AbsoluteTime)); + } + } + + ~PowerMeterDevice() { + if (mHandle != INVALID_HANDLE_VALUE) { + ::CloseHandle(mHandle); + } + } + + void Sample() { + MOZ_ASSERT(HasChannels()); + MOZ_ASSERT(mDataBuffer); + + DWORD dwOut; + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA[mChannels.length()]), &dwOut, + nullptr)) { + return; + } + + for (size_t i = 0; i < mChannels.length(); ++i) { + EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i]; + mChannels[i]->AddSample(channel_data->AbsoluteEnergy, + channel_data->AbsoluteTime); + } + } + + bool HasChannels() { return mChannels.length() != 0; } + void AppendCountersTo(PowerCounters::CountVector& aCounters) { + if (aCounters.reserve(aCounters.length() + mChannels.length())) { + for (auto& channel : mChannels) { + aCounters.infallibleAppend(channel.get()); + } + } + } + + private: + Vector, 4> mChannels; + HANDLE mHandle = INVALID_HANDLE_VALUE; + UniquePtr mDataBuffer; +}; + +PowerCounters::PowerCounters() { + class MOZ_STACK_CLASS HDevInfoHolder final { + public: + explicit HDevInfoHolder(HDEVINFO aHandle) : mHandle(aHandle) {} + + ~HDevInfoHolder() { ::SetupDiDestroyDeviceInfoList(mHandle); } + + private: + HDEVINFO mHandle; + }; + + if (!XRE_IsParentProcess()) { + // Energy meters are global, so only sample them on the parent. + return; + } + + // Energy Metering Device Interface + // {45BD8344-7ED6-49cf-A440-C276C933B053} + // + // Using GUID_DEVICE_ENERGY_METER does not compile as the symbol does not + // exist before Windows 10. + GUID my_GUID_DEVICE_ENERGY_METER = { + 0x45bd8344, + 0x7ed6, + 0x49cf, + {0xa4, 0x40, 0xc2, 0x76, 0xc9, 0x33, 0xb0, 0x53}}; + + HDEVINFO hdev = + ::SetupDiGetClassDevs(&my_GUID_DEVICE_ENERGY_METER, nullptr, nullptr, + DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); + if (hdev == INVALID_HANDLE_VALUE) { + return; + } + + HDevInfoHolder hdevHolder(hdev); + + DWORD i = 0; + SP_DEVICE_INTERFACE_DATA did = {0}; + did.cbSize = sizeof(did); + + while (::SetupDiEnumDeviceInterfaces( + hdev, nullptr, &my_GUID_DEVICE_ENERGY_METER, i++, &did)) { + DWORD bufferSize = 0; + ::SetupDiGetDeviceInterfaceDetail(hdev, &did, nullptr, 0, &bufferSize, + nullptr); + if (::GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + continue; + } + + UniquePtr buffer(new (std::nothrow) uint8_t[bufferSize]); + if (!buffer) { + continue; + } + + PSP_DEVICE_INTERFACE_DETAIL_DATA pdidd = + reinterpret_cast(buffer.get()); + MOZ_ASSERT(uintptr_t(buffer.get()) % + alignof(PSP_DEVICE_INTERFACE_DETAIL_DATA) == + 0); + pdidd->cbSize = sizeof(*pdidd); + if (!::SetupDiGetDeviceInterfaceDetail(hdev, &did, pdidd, bufferSize, + &bufferSize, nullptr)) { + continue; + } + + UniquePtr pmd = + MakeUnique(pdidd->DevicePath); + if (!pmd->HasChannels() || + !mPowerMeterDevices.emplaceBack(std::move(pmd))) { + NS_WARNING("PowerMeterDevice without measurement channel (or OOM)"); + } + } + + for (auto& device : mPowerMeterDevices) { + device->AppendCountersTo(mCounters); + } +} + +PowerCounters::~PowerCounters() { mCounters.clear(); } + +void PowerCounters::Sample() { + for (auto& device : mPowerMeterDevices) { + device->Sample(); + } +} diff --git a/tools/profiler/core/PowerCounters.h b/tools/profiler/core/PowerCounters.h new file mode 100644 index 0000000000..2fd8d5892c --- /dev/null +++ b/tools/profiler/core/PowerCounters.h @@ -0,0 +1,52 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TOOLS_POWERCOUNTERS_H_ +#define TOOLS_POWERCOUNTERS_H_ + +#include "PlatformMacros.h" +#include "mozilla/ProfilerCounts.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" + +#if defined(_MSC_VER) +class PowerMeterDevice; +#endif +#if defined(GP_PLAT_arm64_darwin) +class ProcessPower; +#endif +#if defined(GP_PLAT_amd64_darwin) +class RAPL; +#endif + +class PowerCounters { + public: +#if defined(_MSC_VER) || defined(GP_OS_darwin) || defined(GP_PLAT_amd64_linux) + explicit PowerCounters(); + ~PowerCounters(); + void Sample(); +#else + explicit PowerCounters(){}; + ~PowerCounters(){}; + void Sample(){}; +#endif + + using CountVector = mozilla::Vector; + const CountVector& GetCounters() { return mCounters; } + + private: + CountVector mCounters; + +#if defined(_MSC_VER) + mozilla::Vector> mPowerMeterDevices; +#endif +#if defined(GP_PLAT_arm64_darwin) + mozilla::UniquePtr mProcessPower; +#endif +#if defined(GP_PLAT_amd64_darwin) + RAPL* mRapl; +#endif +}; + +#endif /* ndef TOOLS_POWERCOUNTERS_H_ */ diff --git a/tools/profiler/core/ProfileAdditionalInformation.cpp b/tools/profiler/core/ProfileAdditionalInformation.cpp new file mode 100644 index 0000000000..ba3cd80e7c --- /dev/null +++ b/tools/profiler/core/ProfileAdditionalInformation.cpp @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileAdditionalInformation.h" + +#include "jsapi.h" +#include "js/JSON.h" +#include "js/PropertyAndElement.h" +#include "js/Value.h" +#include "mozilla/JSONStringWriteFuncs.h" +#include "mozilla/ipc/IPDLParamTraits.h" + +#ifdef MOZ_GECKO_PROFILER +# include "platform.h" + +void mozilla::ProfileGenerationAdditionalInformation::ToJSValue( + JSContext* aCx, JS::MutableHandle aRetVal) const { + // Get the shared libraries array. + JS::Rooted sharedLibrariesVal(aCx); + { + JSONStringWriteFunc buffer; + JSONWriter w(buffer, JSONWriter::SingleLineStyle); + w.StartArrayElement(); + AppendSharedLibraries(w, mSharedLibraries); + w.EndArray(); + NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef()); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, + static_cast(buffer16.get()), + buffer16.Length(), &sharedLibrariesVal)); + } + + JS::Rooted additionalInfoObj(aCx, JS_NewPlainObject(aCx)); + JS_SetProperty(aCx, additionalInfoObj, "sharedLibraries", sharedLibrariesVal); + aRetVal.setObject(*additionalInfoObj); +} +#endif // MOZ_GECKO_PROFILER + +namespace IPC { + +#ifdef MOZ_GECKO_PROFILER +void IPC::ParamTraits::Write(MessageWriter* aWriter, + const paramType& aParam) { + WriteParam(aWriter, aParam.mStart); + WriteParam(aWriter, aParam.mEnd); + WriteParam(aWriter, aParam.mOffset); + WriteParam(aWriter, aParam.mBreakpadId); + WriteParam(aWriter, aParam.mCodeId); + WriteParam(aWriter, aParam.mModuleName); + WriteParam(aWriter, aParam.mModulePath); + WriteParam(aWriter, aParam.mDebugName); + WriteParam(aWriter, aParam.mDebugPath); + WriteParam(aWriter, aParam.mVersion); + WriteParam(aWriter, aParam.mArch); +} + +bool IPC::ParamTraits::Read(MessageReader* aReader, + paramType* aResult) { + return ReadParam(aReader, &aResult->mStart) && + ReadParam(aReader, &aResult->mEnd) && + ReadParam(aReader, &aResult->mOffset) && + ReadParam(aReader, &aResult->mBreakpadId) && + ReadParam(aReader, &aResult->mCodeId) && + ReadParam(aReader, &aResult->mModuleName) && + ReadParam(aReader, &aResult->mModulePath) && + ReadParam(aReader, &aResult->mDebugName) && + ReadParam(aReader, &aResult->mDebugPath) && + ReadParam(aReader, &aResult->mVersion) && + ReadParam(aReader, &aResult->mArch); +} + +void IPC::ParamTraits::Write(MessageWriter* aWriter, + const paramType& aParam) { + paramType& p = const_cast(aParam); + WriteParam(aWriter, p.mEntries); +} + +bool IPC::ParamTraits::Read(MessageReader* aReader, + paramType* aResult) { + return ReadParam(aReader, &aResult->mEntries); +} +#endif // MOZ_GECKO_PROFILER + +void IPC::ParamTraits::Write( + MessageWriter* aWriter, const paramType& aParam) { +#ifdef MOZ_GECKO_PROFILER + WriteParam(aWriter, aParam.mSharedLibraries); +#endif // MOZ_GECKO_PROFILER +} + +bool IPC::ParamTraits::Read( + MessageReader* aReader, paramType* aResult) { +#ifdef MOZ_GECKO_PROFILER + return ReadParam(aReader, &aResult->mSharedLibraries); +#else + return true; +#endif // MOZ_GECKO_PROFILER +} + +} // namespace IPC diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp new file mode 100644 index 0000000000..170a4f14b4 --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.cpp @@ -0,0 +1,243 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBuffer.h" + +#include "BaseProfiler.h" +#include "js/GCAPI.h" +#include "jsfriendapi.h" +#include "mozilla/MathAlgorithms.h" +#include "nsJSPrincipals.h" +#include "nsScriptSecurityManager.h" + +using namespace mozilla; + +ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer) + : mEntries(aBuffer) { + // Assume the given buffer is in-session. + MOZ_ASSERT(mEntries.IsInSession()); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry) { + switch (aEntry.GetKind()) { +#define SWITCH_KIND(KIND, TYPE, SIZE) \ + case ProfileBufferEntry::Kind::KIND: { \ + return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \ + } + + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND) + +#undef SWITCH_KIND + default: + MOZ_ASSERT(false, "Unhandled ProfilerBuffer entry KIND"); + return ProfileBufferBlockIndex{}; + } +} + +// Called from signal, call only reentrant functions +uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) { + return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex(); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, ProfilerThreadId aThreadId) { + return AddEntry(aProfileChunkedBuffer, + ProfileBufferEntry::ThreadId(aThreadId)); +} + +uint64_t ProfileBuffer::AddThreadIdEntry(ProfilerThreadId aThreadId) { + return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex(); +} + +void ProfileBuffer::CollectCodeLocation( + const char* aLabel, const char* aStr, uint32_t aFrameFlags, + uint64_t aInnerWindowID, const Maybe& aLineNumber, + const Maybe& aColumnNumber, + const Maybe& aCategoryPair) { + AddEntry(ProfileBufferEntry::Label(aLabel)); + AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags))); + + if (aStr) { + // Store the string using one or more DynamicStringFragment entries. + size_t strLen = strlen(aStr) + 1; // +1 for the null terminator + // If larger than the prescribed limit, we will cut the string and end it + // with an ellipsis. + const bool tooBig = strLen > kMaxFrameKeyLength; + if (tooBig) { + strLen = kMaxFrameKeyLength; + } + char chars[ProfileBufferEntry::kNumChars]; + for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) { + // Store up to kNumChars characters in the entry. + size_t len = ProfileBufferEntry::kNumChars; + const bool last = j + len >= strLen; + if (last) { + // Only the last entry may be smaller than kNumChars. + len = strLen - j; + if (tooBig) { + // That last entry is part of a too-big string, replace the end + // characters with an ellipsis "...". + len = std::max(len, size_t(4)); + chars[len - 4] = '.'; + chars[len - 3] = '.'; + chars[len - 2] = '.'; + chars[len - 1] = '\0'; + // Make sure the memcpy will not overwrite our ellipsis! + len -= 4; + } + } + memcpy(chars, &aStr[j], len); + AddEntry(ProfileBufferEntry::DynamicStringFragment(chars)); + if (last) { + break; + } + } + } + + if (aInnerWindowID) { + AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID)); + } + + if (aLineNumber) { + AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber)); + } + + if (aColumnNumber) { + AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber)); + } + + if (aCategoryPair.isSome()) { + AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair))); + } +} + +size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - memory pointed to by the elements within mEntries + return mEntries.SizeOfExcludingThis(aMallocSizeOf); +} + +size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); +} + +void ProfileBuffer::CollectOverheadStats(double aSamplingTimeMs, + TimeDuration aLocking, + TimeDuration aCleaning, + TimeDuration aCounters, + TimeDuration aThreads) { + double timeUs = aSamplingTimeMs * 1000.0; + if (mFirstSamplingTimeUs == 0.0) { + mFirstSamplingTimeUs = timeUs; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many thousands + // of iterations. + mIntervalsUs.Count(timeUs - mLastSamplingTimeUs); + } + mLastSamplingTimeUs = timeUs; + double locking = aLocking.ToMilliseconds() * 1000.0; + double cleaning = aCleaning.ToMilliseconds() * 1000.0; + double counters = aCounters.ToMilliseconds() * 1000.0; + double threads = aThreads.ToMilliseconds() * 1000.0; + + mOverheadsUs.Count(locking + cleaning + counters + threads); + mLockingsUs.Count(locking); + mCleaningsUs.Count(cleaning); + mCountersUs.Count(counters); + mThreadsUs.Count(threads); + + static const bool sRecordSamplingOverhead = []() { + const char* recordOverheads = getenv("MOZ_PROFILER_RECORD_OVERHEADS"); + return recordOverheads && recordOverheads[0] != '\0'; + }(); + if (sRecordSamplingOverhead) { + AddEntry(ProfileBufferEntry::ProfilerOverheadTime(aSamplingTimeMs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(locking)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaning)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(counters)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threads)); + } +} + +ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const { + return {BufferRangeStart(), + BufferRangeEnd(), + static_cast(*mEntries.BufferLength() / + 8), // 8 bytes per entry. + mIntervalsUs, + mOverheadsUs, + mLockingsUs, + mCleaningsUs, + mCountersUs, + mThreadsUs}; +} + +/* ProfileBufferCollector */ + +void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) { + mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr)); +} + +void ProfileBufferCollector::CollectJitReturnAddr(void* aAddr) { + mBuf.AddEntry(ProfileBufferEntry::JitReturnAddr(aAddr)); +} + +void ProfileBufferCollector::CollectWasmFrame(const char* aLabel) { + mBuf.CollectCodeLocation("", aLabel, 0, 0, Nothing(), Nothing(), + Some(JS::ProfilingCategoryPair::JS_Wasm)); +} + +void ProfileBufferCollector::CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aFrame.isLabelFrame() || + (aFrame.isJsFrame() && !aFrame.isOSRFrame())); + + const char* label = aFrame.label(); + const char* dynamicString = aFrame.dynamicString(); + Maybe line; + Maybe column; + + if (aFrame.isJsFrame()) { + // There are two kinds of JS frames that get pushed onto the ProfilingStack. + // + // - label = "", dynamic string = + // - label = "js::RunScript", dynamic string = nullptr + // + // The line number is only interesting in the first case. + + if (label[0] == '\0') { + MOZ_ASSERT(dynamicString); + + // We call aFrame.script() repeatedly -- rather than storing the result in + // a local variable in order -- to avoid rooting hazards. + if (aFrame.script()) { + if (aFrame.pc()) { + unsigned col = 0; + line = Some(JS_PCToLineNumber(aFrame.script(), aFrame.pc(), &col)); + column = Some(col); + } + } + + } else { + MOZ_ASSERT(strcmp(label, "js::RunScript") == 0 && !dynamicString); + } + } else { + MOZ_ASSERT(aFrame.isLabelFrame()); + } + + mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(), + aFrame.realmID(), line, column, + Some(aFrame.categoryPair())); +} diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h new file mode 100644 index 0000000000..5da34909cc --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.h @@ -0,0 +1,260 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_BUFFER_H +#define MOZ_PROFILE_BUFFER_H + +#include "GeckoProfiler.h" +#include "ProfileBufferEntry.h" + +#include "mozilla/Maybe.h" +#include "mozilla/PowerOfTwo.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileChunkedBuffer.h" + +class ProcessStreamingContext; +class RunningTimes; + +// Class storing most profiling data in a ProfileChunkedBuffer. +// +// This class is used as a queue of entries which, after construction, never +// allocates. This makes it safe to use in the profiler's "critical section". +class ProfileBuffer final { + public: + // ProfileBuffer constructor + // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer + // manager. + explicit ProfileBuffer(mozilla::ProfileChunkedBuffer& aBuffer); + + mozilla::ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { + return mEntries; + } + + bool IsThreadSafe() const { return mEntries.IsThreadSafe(); } + + // Add |aEntry| to the buffer, ignoring what kind of entry it is. + uint64_t AddEntry(const ProfileBufferEntry& aEntry); + + // Add to the buffer a sample start (ThreadId) entry for aThreadId. + // Returns the position of the entry. + uint64_t AddThreadIdEntry(ProfilerThreadId aThreadId); + + void CollectCodeLocation( + const char* aLabel, const char* aStr, uint32_t aFrameFlags, + uint64_t aInnerWindowID, const mozilla::Maybe& aLineNumber, + const mozilla::Maybe& aColumnNumber, + const mozilla::Maybe& aCategoryPair); + + // Maximum size of a frameKey string that we'll handle. + static const size_t kMaxFrameKeyLength = 512; + + // Add JIT frame information to aJITFrameInfo for any JitReturnAddr entries + // that are currently in the buffer at or after aRangeStart, in samples + // for the given thread. + void AddJITInfoForRange(uint64_t aRangeStart, ProfilerThreadId aThreadId, + JSContext* aContext, JITFrameInfo& aJITFrameInfo, + mozilla::ProgressLogger aProgressLogger) const; + + // Stream JSON for samples in the buffer to aWriter, using the supplied + // UniqueStacks object. + // Only streams samples for the given thread ID and which were taken at or + // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only + // be used when the buffer contains only one sample. + // aUniqueStacks needs to contain information about any JIT frames that we + // might encounter in the buffer, before this method is called. In other + // words, you need to have called AddJITInfoForRange for every range that + // might contain JIT frame information before calling this method. + // Return the thread ID of the streamed sample(s), or 0. + ProfilerThreadId StreamSamplesToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const; + + void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, + ProfilerThreadId aThreadId, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const; + + // Stream samples and markers from all threads that `aProcessStreamingContext` + // accepts. + void StreamSamplesAndMarkersToJSON( + ProcessStreamingContext& aProcessStreamingContext, + mozilla::ProgressLogger aProgressLogger) const; + + void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter, + double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + void StreamProfilerOverheadToJSON( + SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + void StreamCountersToJSON(SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + + // Find (via |aLastSample|) the most recent sample for the thread denoted by + // |aThreadId| and clone it, patching in the current time as appropriate. + // Mutate |aLastSample| to point to the newly inserted sample. + // Returns whether duplication was successful. + bool DuplicateLastSample(ProfilerThreadId aThreadId, double aSampleTimeMs, + mozilla::Maybe& aLastSample, + const RunningTimes& aRunningTimes); + + void DiscardSamplesBeforeTime(double aTime); + + // Read an entry in the buffer. + ProfileBufferEntry GetEntry(uint64_t aPosition) const { + return mEntries.ReadAt( + mozilla::ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + aPosition), + [&](mozilla::Maybe&& aMER) { + ProfileBufferEntry entry; + if (aMER.isSome()) { + if (aMER->CurrentBlockIndex().ConvertToProfileBufferIndex() == + aPosition) { + // If we're here, it means `aPosition` pointed at a valid block. + MOZ_RELEASE_ASSERT(aMER->RemainingBytes() <= sizeof(entry)); + aMER->ReadBytes(&entry, aMER->RemainingBytes()); + } else { + // EntryReader at the wrong position, pretend to have read + // everything. + aMER->SetRemainingBytes(0); + } + } + return entry; + }); + } + + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + void CollectOverheadStats(double aSamplingTimeMs, + mozilla::TimeDuration aLocking, + mozilla::TimeDuration aCleaning, + mozilla::TimeDuration aCounters, + mozilla::TimeDuration aThreads); + + ProfilerBufferInfo GetProfilerBufferInfo() const; + + private: + // Add |aEntry| to the provided ProfileChunkedBuffer. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static mozilla::ProfileBufferBlockIndex AddEntry( + mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry); + + // Add a sample start (ThreadId) entry for aThreadId to the provided + // ProfileChunkedBuffer. Returns the position of the entry. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static mozilla::ProfileBufferBlockIndex AddThreadIdEntry( + mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer, + ProfilerThreadId aThreadId); + + // The storage in which this ProfileBuffer stores its entries. + mozilla::ProfileChunkedBuffer& mEntries; + + public: + // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values + // corresponding to the first entry and past the last entry stored in + // `mEntries`. + // + // The returned values are not guaranteed to be stable, because other threads + // may also be accessing the buffer concurrently. But they will always + // increase, and can therefore give an indication of how far these values have + // *at least* reached. In particular: + // - Entries whose index is strictly less that `BufferRangeStart()` have been + // discarded by now, so any related data may also be safely discarded. + // - It is safe to try and read entries at any index strictly less than + // `BufferRangeEnd()` -- but note that these reads may fail by the time you + // request them, as old entries get overwritten by new ones. + uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; } + uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; } + + private: + // Single pre-allocated chunk (to avoid spurious mallocs), used when: + // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize). + // - Adding JIT info. + // - Streaming stacks to JSON. + // Mutable because it's accessed from non-multithreaded const methods. + mutable mozilla::Maybe + mMaybeWorkerChunkManager; + mozilla::ProfileBufferChunkManagerSingle& WorkerChunkManager() const { + if (mMaybeWorkerChunkManager.isNothing()) { + // Only actually allocate it on first use. (Some ProfileBuffers are + // temporary and don't actually need this.) + mMaybeWorkerChunkManager.emplace( + mozilla::ProfileBufferChunk::SizeofChunkMetadata() + + mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize); + } + return *mMaybeWorkerChunkManager; + } + + // GetStreamingParametersForThreadCallback: + // (ProfilerThreadId) -> Maybe + template + ProfilerThreadId DoStreamSamplesAndMarkersToJSON( + mozilla::FailureLatch& aFailureLatch, + GetStreamingParametersForThreadCallback&& + aGetStreamingParametersForThreadCallback, + double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers, + mozilla::ProgressLogger aProgressLogger) const; + + double mFirstSamplingTimeUs = 0.0; + double mLastSamplingTimeUs = 0.0; + ProfilerStats mIntervalsUs; + ProfilerStats mOverheadsUs; + ProfilerStats mLockingsUs; + ProfilerStats mCleaningsUs; + ProfilerStats mCountersUs; + ProfilerStats mThreadsUs; +}; + +/** + * Helper type used to implement ProfilerStackCollector. This type is used as + * the collector for MergeStacks by ProfileBuffer. It holds a reference to the + * buffer, as well as additional feature flags which are needed to control the + * data collection strategy + */ +class ProfileBufferCollector final : public ProfilerStackCollector { + public: + ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos, + uint64_t aBufferRangeStart) + : mBuf(aBuf), + mSamplePositionInBuffer(aSamplePos), + mBufferRangeStart(aBufferRangeStart) { + MOZ_ASSERT( + mSamplePositionInBuffer >= mBufferRangeStart, + "The sample position should always be after the buffer range start"); + } + + // Position at which the sample starts in the profiler buffer (which may be + // different from the buffer in which the sample data is collected here). + mozilla::Maybe SamplePositionInBuffer() override { + return mozilla::Some(mSamplePositionInBuffer); + } + + // Profiler buffer's range start (which may be different from the buffer in + // which the sample data is collected here). + mozilla::Maybe BufferRangeStart() override { + return mozilla::Some(mBufferRangeStart); + } + + virtual void CollectNativeLeafAddr(void* aAddr) override; + virtual void CollectJitReturnAddr(void* aAddr) override; + virtual void CollectWasmFrame(const char* aLabel) override; + virtual void CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) override; + + private: + ProfileBuffer& mBuf; + uint64_t mSamplePositionInBuffer; + uint64_t mBufferRangeStart; +}; + +#endif diff --git a/tools/profiler/core/ProfileBufferEntry.cpp b/tools/profiler/core/ProfileBufferEntry.cpp new file mode 100644 index 0000000000..5429eac0b8 --- /dev/null +++ b/tools/profiler/core/ProfileBufferEntry.cpp @@ -0,0 +1,2321 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBufferEntry.h" + +#include "mozilla/ProfilerMarkers.h" +#include "platform.h" +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfilerRustBindings.h" + +#include "js/ProfilingFrameIterator.h" +#include "jsapi.h" +#include "jsfriendapi.h" +#include "mozilla/Logging.h" +#include "mozilla/JSONStringWriteFuncs.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/Sprintf.h" +#include "mozilla/StackWalk.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "ProfilerCodeAddressService.h" + +#include +#include + +using namespace mozilla; +using namespace mozilla::literals::ProportionValue_literals; + +//////////////////////////////////////////////////////////////////////// +// BEGIN ProfileBufferEntry + +ProfileBufferEntry::ProfileBufferEntry() + : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {} + +// aString must be a static string. +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString) + : mKind(aKind) { + MOZ_ASSERT(aKind == Kind::Label); + memcpy(mStorage, &aString, sizeof(aString)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars]) + : mKind(aKind) { + MOZ_ASSERT(aKind == Kind::DynamicStringFragment); + memcpy(mStorage, aChars, kNumChars); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) { + memcpy(mStorage, &aPtr, sizeof(aPtr)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble) + : mKind(aKind) { + memcpy(mStorage, &aDouble, sizeof(aDouble)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) { + memcpy(mStorage, &aInt, sizeof(aInt)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64) + : mKind(aKind) { + memcpy(mStorage, &aInt64, sizeof(aInt64)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64) + : mKind(aKind) { + memcpy(mStorage, &aUint64, sizeof(aUint64)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId) + : mKind(aKind) { + static_assert(std::is_trivially_copyable_v); + static_assert(sizeof(aThreadId) <= sizeof(mStorage)); + memcpy(mStorage, &aThreadId, sizeof(aThreadId)); +} + +const char* ProfileBufferEntry::GetString() const { + const char* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void* ProfileBufferEntry::GetPtr() const { + void* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +double ProfileBufferEntry::GetDouble() const { + double result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int ProfileBufferEntry::GetInt() const { + int result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int64_t ProfileBufferEntry::GetInt64() const { + int64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +uint64_t ProfileBufferEntry::GetUint64() const { + uint64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +ProfilerThreadId ProfileBufferEntry::GetThreadId() const { + ProfilerThreadId result; + static_assert(std::is_trivially_copyable_v); + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const { + memcpy(aOutArray, mStorage, kNumChars); +} + +// END ProfileBufferEntry +//////////////////////////////////////////////////////////////////////// + +struct TypeInfo { + Maybe mKeyedBy; + Maybe mName; + Maybe mLocation; + Maybe mLineNumber; +}; + +// As mentioned in ProfileBufferEntry.h, the JSON format contains many +// arrays whose elements are laid out according to various schemas to help +// de-duplication. This RAII class helps write these arrays by keeping track of +// the last non-null element written and adding the appropriate number of null +// elements when writing new non-null elements. It also automatically opens and +// closes an array element on the given JSON writer. +// +// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and +// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do +// not access them independently while the AutoArraySchemaWriter is alive. +// If you need to add complex objects, call FreeFormElement(), which will give +// you temporary access to the writer. +// +// Example usage: +// +// // Define the schema of elements in this type of array: [FOO, BAR, BAZ] +// enum Schema : uint32_t { +// FOO = 0, +// BAR = 1, +// BAZ = 2 +// }; +// +// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings); +// if (shouldWriteFoo) { +// writer.IntElement(FOO, getFoo()); +// } +// ... etc ... +// +// The elements need to be added in-order. +class MOZ_RAII AutoArraySchemaWriter { + public: + explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter) + : mJSONWriter(aWriter), mNextFreeIndex(0) { + mJSONWriter.StartArrayElement(); + } + + ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); } + + template + void IntElement(uint32_t aIndex, T aValue) { + static_assert(!std::is_same_v, + "Narrowing uint64 -> int64 conversion not allowed"); + FillUpTo(aIndex); + mJSONWriter.IntElement(static_cast(aValue)); + } + + void DoubleElement(uint32_t aIndex, double aValue) { + FillUpTo(aIndex); + mJSONWriter.DoubleElement(aValue); + } + + void TimeMsElement(uint32_t aIndex, double aTime_ms) { + FillUpTo(aIndex); + mJSONWriter.TimeDoubleMsElement(aTime_ms); + } + + void BoolElement(uint32_t aIndex, bool aValue) { + FillUpTo(aIndex); + mJSONWriter.BoolElement(aValue); + } + + protected: + SpliceableJSONWriter& Writer() { return mJSONWriter; } + + void FillUpTo(uint32_t aIndex) { + MOZ_ASSERT(aIndex >= mNextFreeIndex); + mJSONWriter.NullElements(aIndex - mNextFreeIndex); + mNextFreeIndex = aIndex + 1; + } + + private: + SpliceableJSONWriter& mJSONWriter; + uint32_t mNextFreeIndex; +}; + +// Same as AutoArraySchemaWriter, but this can also write strings (output as +// indexes into the table of unique strings). +class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter { + public: + AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter, + UniqueJSONStrings& aStrings) + : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {} + + void StringElement(uint32_t aIndex, const Span& aValue) { + FillUpTo(aIndex); + mStrings.WriteElement(Writer(), aValue); + } + + private: + UniqueJSONStrings& mStrings; +}; + +Maybe UniqueStacks::BeginStack(const FrameKey& aFrame) { + if (Maybe frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) { + return Some(StackKey(*frameIndex)); + } + return Nothing{}; +} + +Vector&& +JITFrameInfo::MoveRangesWithNewFailureLatch(FailureLatch& aFailureLatch) && { + aFailureLatch.SetFailureFrom(mLocalFailureLatchSource); + return std::move(mRanges); +} + +UniquePtr&& +JITFrameInfo::MoveUniqueStringsWithNewFailureLatch( + FailureLatch& aFailureLatch) && { + if (mUniqueStrings) { + mUniqueStrings->ChangeFailureLatchAndForwardState(aFailureLatch); + } else { + aFailureLatch.SetFailureFrom(mLocalFailureLatchSource); + } + return std::move(mUniqueStrings); +} + +Maybe UniqueStacks::AppendFrame( + const StackKey& aStack, const FrameKey& aFrame) { + if (Maybe stackIndex = GetOrAddStackIndex(aStack); stackIndex) { + if (Maybe frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) { + return Some(StackKey(aStack, *stackIndex, *frameIndex)); + } + } + return Nothing{}; +} + +JITFrameInfoForBufferRange JITFrameInfoForBufferRange::Clone() const { + JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFramesMap; + MOZ_RELEASE_ASSERT( + jitAddressToJITFramesMap.reserve(mJITAddressToJITFramesMap.count())); + for (auto iter = mJITAddressToJITFramesMap.iter(); !iter.done(); + iter.next()) { + const mozilla::Vector& srcKeys = iter.get().value(); + mozilla::Vector destKeys; + MOZ_RELEASE_ASSERT(destKeys.appendAll(srcKeys)); + jitAddressToJITFramesMap.putNewInfallible(iter.get().key(), + std::move(destKeys)); + } + + JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap; + MOZ_RELEASE_ASSERT( + jitFrameToFrameJSONMap.reserve(mJITFrameToFrameJSONMap.count())); + for (auto iter = mJITFrameToFrameJSONMap.iter(); !iter.done(); iter.next()) { + jitFrameToFrameJSONMap.putNewInfallible(iter.get().key(), + iter.get().value()); + } + + return JITFrameInfoForBufferRange{mRangeStart, mRangeEnd, + std::move(jitAddressToJITFramesMap), + std::move(jitFrameToFrameJSONMap)}; +} + +JITFrameInfo::JITFrameInfo(const JITFrameInfo& aOther, + mozilla::ProgressLogger aProgressLogger) + : mUniqueStrings(MakeUniqueFallible( + mLocalFailureLatchSource, *aOther.mUniqueStrings, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Creating JIT frame info unique strings...", 49_pc, + "Created JIT frame info unique strings"))) { + if (!mUniqueStrings) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo allocating mUniqueStrings"); + return; + } + + if (mRanges.reserve(aOther.mRanges.length())) { + for (auto&& [i, progressLogger] : + aProgressLogger.CreateLoopSubLoggersFromTo(50_pc, 100_pc, + aOther.mRanges.length(), + "Copying JIT frame info")) { + mRanges.infallibleAppend(aOther.mRanges[i].Clone()); + } + } else { + mLocalFailureLatchSource.SetFailure("OOM in JITFrameInfo resizing mRanges"); + } +} + +bool UniqueStacks::FrameKey::NormalFrameData::operator==( + const NormalFrameData& aOther) const { + return mLocation == aOther.mLocation && + mRelevantForJS == aOther.mRelevantForJS && + mBaselineInterp == aOther.mBaselineInterp && + mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine && + mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair; +} + +bool UniqueStacks::FrameKey::JITFrameData::operator==( + const JITFrameData& aOther) const { + return mCanonicalAddress == aOther.mCanonicalAddress && + mDepth == aOther.mDepth && mRangeIndex == aOther.mRangeIndex; +} + +// Consume aJITFrameInfo by stealing its string table and its JIT frame info +// ranges. The JIT frame info contains JSON which refers to strings from the +// JIT frame info's string table, so our string table needs to have the same +// strings at the same indices. +UniqueStacks::UniqueStacks( + FailureLatch& aFailureLatch, JITFrameInfo&& aJITFrameInfo, + ProfilerCodeAddressService* aCodeAddressService /* = nullptr */) + : mUniqueStrings(std::move(aJITFrameInfo) + .MoveUniqueStringsWithNewFailureLatch(aFailureLatch)), + mCodeAddressService(aCodeAddressService), + mFrameTableWriter(aFailureLatch), + mStackTableWriter(aFailureLatch), + mJITInfoRanges(std::move(aJITFrameInfo) + .MoveRangesWithNewFailureLatch(aFailureLatch)) { + if (!mUniqueStrings) { + SetFailure("Did not get mUniqueStrings from JITFrameInfo"); + return; + } + + mFrameTableWriter.StartBareList(); + mStackTableWriter.StartBareList(); +} + +Maybe UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) { + if (Failed()) { + return Nothing{}; + } + + uint32_t count = mStackToIndexMap.count(); + auto entry = mStackToIndexMap.lookupForAdd(aStack); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return Some(entry->value()); + } + + if (!mStackToIndexMap.add(entry, aStack, count)) { + SetFailure("OOM in UniqueStacks::GetOrAddStackIndex"); + return Nothing{}; + } + StreamStack(aStack); + return Some(count); +} + +Maybe> +UniqueStacks::LookupFramesForJITAddressFromBufferPos(void* aJITAddress, + uint64_t aBufferPos) { + JITFrameInfoForBufferRange* rangeIter = + std::lower_bound(mJITInfoRanges.begin(), mJITInfoRanges.end(), aBufferPos, + [](const JITFrameInfoForBufferRange& aRange, + uint64_t aPos) { return aRange.mRangeEnd < aPos; }); + MOZ_RELEASE_ASSERT( + rangeIter != mJITInfoRanges.end() && + rangeIter->mRangeStart <= aBufferPos && + aBufferPos < rangeIter->mRangeEnd, + "Buffer position of jit address needs to be in one of the ranges"); + + using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey; + + const JITFrameInfoForBufferRange& jitFrameInfoRange = *rangeIter; + auto jitFrameKeys = + jitFrameInfoRange.mJITAddressToJITFramesMap.lookup(aJITAddress); + if (!jitFrameKeys) { + return Nothing(); + } + + // Map the array of JITFrameKeys to an array of FrameKeys, and ensure that + // each of the FrameKeys exists in mFrameToIndexMap. + Vector frameKeys; + MOZ_RELEASE_ASSERT(frameKeys.initCapacity(jitFrameKeys->value().length())); + for (const JITFrameKey& jitFrameKey : jitFrameKeys->value()) { + FrameKey frameKey(jitFrameKey.mCanonicalAddress, jitFrameKey.mDepth, + rangeIter - mJITInfoRanges.begin()); + uint32_t index = mFrameToIndexMap.count(); + auto entry = mFrameToIndexMap.lookupForAdd(frameKey); + if (!entry) { + // We need to add this frame to our frame table. The JSON for this frame + // already exists in jitFrameInfoRange, we just need to splice it into + // the frame table and give it an index. + auto frameJSON = + jitFrameInfoRange.mJITFrameToFrameJSONMap.lookup(jitFrameKey); + MOZ_RELEASE_ASSERT(frameJSON, "Should have cached JSON for this frame"); + mFrameTableWriter.Splice(frameJSON->value()); + MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, frameKey, index)); + } + MOZ_RELEASE_ASSERT(frameKeys.append(std::move(frameKey))); + } + return Some(std::move(frameKeys)); +} + +Maybe UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) { + if (Failed()) { + return Nothing{}; + } + + uint32_t count = mFrameToIndexMap.count(); + auto entry = mFrameToIndexMap.lookupForAdd(aFrame); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return Some(entry->value()); + } + + if (!mFrameToIndexMap.add(entry, aFrame, count)) { + SetFailure("OOM in UniqueStacks::GetOrAddFrameIndex"); + return Nothing{}; + } + StreamNonJITFrame(aFrame); + return Some(count); +} + +void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) { + mFrameTableWriter.EndBareList(); + aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc()); +} + +void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) { + mStackTableWriter.EndBareList(); + aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc()); +} + +[[nodiscard]] nsAutoCString UniqueStacks::FunctionNameOrAddress(void* aPC) { + nsAutoCString nameOrAddress; + + if (!mCodeAddressService || + !mCodeAddressService->GetFunction(aPC, nameOrAddress) || + nameOrAddress.IsEmpty()) { + nameOrAddress.AppendASCII("0x"); + // `AppendInt` only knows `uint32_t` or `uint64_t`, but because these are + // just aliases for *two* of (`unsigned`, `unsigned long`, and `unsigned + // long long`), a call with `uintptr_t` could use the third type and + // therefore would be ambiguous. + // So we want to force using exactly `uint32_t` or `uint64_t`, whichever + // matches the size of `uintptr_t`. + // (The outer cast to `uint` should then be a no-op.) + using uint = std::conditional_t; + nameOrAddress.AppendInt(static_cast(reinterpret_cast(aPC)), + 16); + } + + return nameOrAddress; +} + +void UniqueStacks::StreamStack(const StackKey& aStack) { + enum Schema : uint32_t { PREFIX = 0, FRAME = 1 }; + + AutoArraySchemaWriter writer(mStackTableWriter); + if (aStack.mPrefixStackIndex.isSome()) { + writer.IntElement(PREFIX, *aStack.mPrefixStackIndex); + } + writer.IntElement(FRAME, aStack.mFrameIndex); +} + +void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) { + if (Failed()) { + return; + } + + using NormalFrameData = FrameKey::NormalFrameData; + + enum Schema : uint32_t { + LOCATION = 0, + RELEVANT_FOR_JS = 1, + INNER_WINDOW_ID = 2, + IMPLEMENTATION = 3, + LINE = 4, + COLUMN = 5, + CATEGORY = 6, + SUBCATEGORY = 7 + }; + + AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings); + + const NormalFrameData& data = aFrame.mData.as(); + writer.StringElement(LOCATION, data.mLocation); + writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS); + + // It's okay to convert uint64_t to double here because DOM always creates IDs + // that are convertible to double. + writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID); + + // The C++ interpreter is the default implementation so we only emit element + // for Baseline Interpreter frames. + if (data.mBaselineInterp) { + writer.StringElement(IMPLEMENTATION, MakeStringSpan("blinterp")); + } + + if (data.mLine.isSome()) { + writer.IntElement(LINE, *data.mLine); + } + if (data.mColumn.isSome()) { + writer.IntElement(COLUMN, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + const JS::ProfilingCategoryPairInfo& info = + JS::GetProfilingCategoryPairInfo(*data.mCategoryPair); + writer.IntElement(CATEGORY, uint32_t(info.mCategory)); + writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex); + } +} + +static void StreamJITFrame(JSContext* aContext, SpliceableJSONWriter& aWriter, + UniqueJSONStrings& aUniqueStrings, + const JS::ProfiledFrameHandle& aJITFrame) { + enum Schema : uint32_t { + LOCATION = 0, + RELEVANT_FOR_JS = 1, + INNER_WINDOW_ID = 2, + IMPLEMENTATION = 3, + LINE = 4, + COLUMN = 5, + CATEGORY = 6, + SUBCATEGORY = 7 + }; + + AutoArraySchemaWithStringsWriter writer(aWriter, aUniqueStrings); + + writer.StringElement(LOCATION, MakeStringSpan(aJITFrame.label())); + writer.BoolElement(RELEVANT_FOR_JS, false); + + // It's okay to convert uint64_t to double here because DOM always creates IDs + // that are convertible to double. + // Realm ID is the name of innerWindowID inside JS code. + writer.DoubleElement(INNER_WINDOW_ID, aJITFrame.realmID()); + + JS::ProfilingFrameIterator::FrameKind frameKind = aJITFrame.frameKind(); + MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion || + frameKind == JS::ProfilingFrameIterator::Frame_Baseline); + writer.StringElement(IMPLEMENTATION, + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? MakeStringSpan("ion") + : MakeStringSpan("baseline")); + + const JS::ProfilingCategoryPairInfo& info = JS::GetProfilingCategoryPairInfo( + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? JS::ProfilingCategoryPair::JS_IonMonkey + : JS::ProfilingCategoryPair::JS_Baseline); + writer.IntElement(CATEGORY, uint32_t(info.mCategory)); + writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex); +} + +static nsCString JSONForJITFrame(JSContext* aContext, + const JS::ProfiledFrameHandle& aJITFrame, + UniqueJSONStrings& aUniqueStrings) { + nsCString json; + JSONStringRefWriteFunc jw(json); + SpliceableJSONWriter writer(jw, aUniqueStrings.SourceFailureLatch()); + StreamJITFrame(aContext, writer, aUniqueStrings, aJITFrame); + return json; +} + +void JITFrameInfo::AddInfoForRange( + uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx, + const std::function&)>& + aJITAddressProvider) { + if (mLocalFailureLatchSource.Failed()) { + return; + } + + if (aRangeStart == aRangeEnd) { + return; + } + + MOZ_RELEASE_ASSERT(aRangeStart < aRangeEnd); + + if (!mRanges.empty()) { + const JITFrameInfoForBufferRange& prevRange = mRanges.back(); + MOZ_RELEASE_ASSERT(prevRange.mRangeEnd <= aRangeStart, + "Ranges must be non-overlapping and added in-order."); + } + + using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey; + + JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFrameMap; + JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap; + + aJITAddressProvider([&](void* aJITAddress) { + // Make sure that we have cached data for aJITAddress. + auto addressEntry = jitAddressToJITFrameMap.lookupForAdd(aJITAddress); + if (!addressEntry) { + Vector jitFrameKeys; + for (JS::ProfiledFrameHandle handle : + JS::GetProfiledFrames(aCx, aJITAddress)) { + uint32_t depth = jitFrameKeys.length(); + JITFrameKey jitFrameKey{handle.canonicalAddress(), depth}; + auto frameEntry = jitFrameToFrameJSONMap.lookupForAdd(jitFrameKey); + if (!frameEntry) { + if (!jitFrameToFrameJSONMap.add( + frameEntry, jitFrameKey, + JSONForJITFrame(aCx, handle, *mUniqueStrings))) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding jit->frame map"); + return; + } + } + if (!jitFrameKeys.append(jitFrameKey)) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding jit frame key"); + return; + } + } + if (!jitAddressToJITFrameMap.add(addressEntry, aJITAddress, + std::move(jitFrameKeys))) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding addr->jit map"); + return; + } + } + }); + + if (!mRanges.append(JITFrameInfoForBufferRange{ + aRangeStart, aRangeEnd, std::move(jitAddressToJITFrameMap), + std::move(jitFrameToFrameJSONMap)})) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding range"); + return; + } +} + +struct ProfileSample { + uint32_t mStack = 0; + double mTime = 0.0; + Maybe mResponsiveness; + RunningTimes mRunningTimes; +}; + +// Write CPU measurements with "Delta" unit, which is some amount of work that +// happened since the previous sample. +static void WriteDelta(AutoArraySchemaWriter& aSchemaWriter, uint32_t aProperty, + uint64_t aDelta) { + aSchemaWriter.IntElement(aProperty, int64_t(aDelta)); +} + +static void WriteSample(SpliceableJSONWriter& aWriter, + const ProfileSample& aSample) { + enum Schema : uint32_t { + STACK = 0, + TIME = 1, + EVENT_DELAY = 2 +#define RUNNING_TIME_SCHEMA(index, name, unit, jsonProperty) , name + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SCHEMA) +#undef RUNNING_TIME_SCHEMA + }; + + AutoArraySchemaWriter writer(aWriter); + + writer.IntElement(STACK, aSample.mStack); + + writer.TimeMsElement(TIME, aSample.mTime); + + if (aSample.mResponsiveness.isSome()) { + writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness); + } + +#define RUNNING_TIME_STREAM(index, name, unit, jsonProperty) \ + aSample.mRunningTimes.GetJson##name##unit().apply( \ + [&writer](const uint64_t& aValue) { \ + Write##unit(writer, name, aValue); \ + }); + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_STREAM) + +#undef RUNNING_TIME_STREAM +} + +static void StreamMarkerAfterKind( + ProfileBufferEntryReader& aER, + ProcessStreamingContext& aProcessStreamingContext) { + ThreadStreamingContext* threadData = nullptr; + mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream( + aER, + [&](ProfilerThreadId aThreadId) -> baseprofiler::SpliceableJSONWriter* { + threadData = + aProcessStreamingContext.GetThreadStreamingContext(aThreadId); + return threadData ? &threadData->mMarkersDataWriter : nullptr; + }, + [&](ProfileChunkedBuffer& aChunkedBuffer) { + ProfilerBacktrace backtrace("", &aChunkedBuffer); + MOZ_ASSERT(threadData, + "threadData should have been set before calling here"); + backtrace.StreamJSON(threadData->mMarkersDataWriter, + aProcessStreamingContext.ProcessStartTime(), + *threadData->mUniqueStacks); + }, + [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag + aTag) { + MOZ_ASSERT(threadData, + "threadData should have been set before calling here"); + + size_t payloadSize = aER.RemainingBytes(); + + ProfileBufferEntryReader::DoubleSpanOfConstBytes spans = + aER.ReadSpans(payloadSize); + if (MOZ_LIKELY(spans.IsSingleSpan())) { + // Only a single span, we can just refer to it directly + // instead of copying it. + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, spans.mFirstOrOnly.Elements(), payloadSize, + &threadData->mMarkersDataWriter); + } else { + // Two spans, we need to concatenate them by copying. + uint8_t* payloadBuffer = new uint8_t[payloadSize]; + spans.CopyBytesTo(payloadBuffer); + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, payloadBuffer, payloadSize, + &threadData->mMarkersDataWriter); + delete[] payloadBuffer; + } + }); +} + +class EntryGetter { + public: + explicit EntryGetter( + ProfileChunkedBuffer::Reader& aReader, + mozilla::FailureLatch& aFailureLatch, + mozilla::ProgressLogger aProgressLogger = {}, + uint64_t aInitialReadPos = 0, + ProcessStreamingContext* aStreamingContextForMarkers = nullptr) + : mFailureLatch(aFailureLatch), + mStreamingContextForMarkers(aStreamingContextForMarkers), + mBlockIt( + aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + aInitialReadPos))), + mBlockItEnd(aReader.end()), + mRangeStart(mBlockIt.BufferRangeStart().ConvertToProfileBufferIndex()), + mRangeSize( + double(mBlockIt.BufferRangeEnd().ConvertToProfileBufferIndex() - + mRangeStart)), + mProgressLogger(std::move(aProgressLogger)) { + SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE); + if (!ReadLegacyOrEnd()) { + // Find and read the next non-legacy entry. + Next(); + } + } + + bool Has() const { + return (!mFailureLatch.Failed()) && (mBlockIt != mBlockItEnd); + } + + const ProfileBufferEntry& Get() const { + MOZ_ASSERT(Has() || mFailureLatch.Failed(), + "Caller should have checked `Has()` before `Get()`"); + return mEntry; + } + + void Next() { + MOZ_ASSERT(Has() || mFailureLatch.Failed(), + "Caller should have checked `Has()` before `Next()`"); + ++mBlockIt; + ReadUntilLegacyOrEnd(); + } + + // Hand off the current iterator to the caller, which may be used to read + // any kind of entries (legacy or modern). + ProfileChunkedBuffer::BlockIterator Iterator() const { return mBlockIt; } + + // After `Iterator()` was used, we can restart from *after* its updated + // position. + void RestartAfter(const ProfileChunkedBuffer::BlockIterator& it) { + mBlockIt = it; + if (!Has()) { + return; + } + Next(); + } + + ProfileBufferBlockIndex CurBlockIndex() const { + return mBlockIt.CurrentBlockIndex(); + } + + uint64_t CurPos() const { + return CurBlockIndex().ConvertToProfileBufferIndex(); + } + + void SetLocalProgress(const char* aLocation) { + mProgressLogger.SetLocalProgress( + ProportionValue{double(CurBlockIndex().ConvertToProfileBufferIndex() - + mRangeStart) / + mRangeSize}, + aLocation); + } + + private: + // Try to read the entry at the current `mBlockIt` position. + // * If we're at the end of the buffer, just return `true`. + // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`), + // read it into `mEntry`, and return `true` as well. + // * Otherwise the entry contains a "modern" type that cannot be read into + // `mEntry`, return `false` (so `EntryGetter` can skip to another entry). + bool ReadLegacyOrEnd() { + if (!Has()) { + return true; + } + // Read the entry "kind", which is always at the start of all entries. + ProfileBufferEntryReader er = *mBlockIt; + auto type = static_cast( + er.ReadObject()); + MOZ_ASSERT(static_cast(type) < + static_cast( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) { + if (type == ProfileBufferEntry::Kind::Marker && + mStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *mStreamingContextForMarkers); + if (!Has()) { + return true; + } + SetLocalProgress("Processed marker"); + } + er.SetRemainingBytes(0); + return false; + } + // Here, we have a legacy item, we need to read it from the start. + // Because the above `ReadObject` moved the reader, we ned to reset it to + // the start of the entry before reading the whole entry. + er = *mBlockIt; + er.ReadBytes(&mEntry, er.RemainingBytes()); + return true; + } + + void ReadUntilLegacyOrEnd() { + for (;;) { + if (ReadLegacyOrEnd()) { + // Either we're at the end, or we could read a legacy entry -> Done. + break; + } + // Otherwise loop around until we hit a legacy entry or the end. + ++mBlockIt; + } + SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE); + } + + mozilla::FailureLatch& mFailureLatch; + + ProcessStreamingContext* const mStreamingContextForMarkers; + + ProfileBufferEntry mEntry; + ProfileChunkedBuffer::BlockIterator mBlockIt; + const ProfileChunkedBuffer::BlockIterator mBlockItEnd; + + // Progress logger, and the data needed to compute the current relative + // position in the buffer. + const mozilla::ProfileBufferIndex mRangeStart; + const double mRangeSize; + mozilla::ProgressLogger mProgressLogger; +}; + +// The following grammar shows legal sequences of profile buffer entries. +// The sequences beginning with a ThreadId entry are known as "samples". +// +// ( +// ( /* Samples */ +// ThreadId +// TimeBeforeCompactStack +// RunningTimes? +// UnresponsivenessDurationMs? +// CompactStack +// /* internally including: +// ( NativeLeafAddr +// | Label FrameFlags? DynamicStringFragment* +// LineNumber? CategoryPair? +// | JitReturnAddr +// )+ +// */ +// ) +// | ( /* Reference to a previous identical sample */ +// ThreadId +// TimeBeforeSameSample +// RunningTimes? +// SameSample +// ) +// | Marker +// | ( /* Counters */ +// CounterId +// Time +// ( +// CounterKey +// Count +// Number? +// )* +// ) +// | CollectionStart +// | CollectionEnd +// | Pause +// | Resume +// | ( ProfilerOverheadTime /* Sampling start timestamp */ +// ProfilerOverheadDuration /* Lock acquisition */ +// ProfilerOverheadDuration /* Expired markers cleaning */ +// ProfilerOverheadDuration /* Counters */ +// ProfilerOverheadDuration /* Threads */ +// ) +// )* +// +// The most complicated part is the stack entry sequence that begins with +// Label. Here are some examples. +// +// - ProfilingStack frames without a dynamic string: +// +// Label("js::RunScript") +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// Label("XREMain::XRE_main") +// LineNumber(4660) +// CategoryPair(JS::ProfilingCategoryPair::OTHER) +// +// Label("ElementRestyler::ComputeStyleChangeFor") +// LineNumber(3003) +// CategoryPair(JS::ProfilingCategoryPair::CSS) +// +// - ProfilingStack frames with a dynamic string: +// +// Label("nsObserverService::NotifyObservers") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("domwindo") +// DynamicStringFragment("wopened") +// LineNumber(291) +// CategoryPair(JS::ProfilingCategoryPair::OTHER) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("closeWin") +// DynamicStringFragment("dow (chr") +// DynamicStringFragment("ome://gl") +// DynamicStringFragment("obal/con") +// DynamicStringFragment("tent/glo") +// DynamicStringFragment("balOverl") +// DynamicStringFragment("ay.js:5)") +// DynamicStringFragment("") # this string holds the closing '\0' +// LineNumber(25) +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("bound (s") +// DynamicStringFragment("elf-host") +// DynamicStringFragment("ed:914)") +// LineNumber(945) +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// - A profiling stack frame with an overly long dynamic string: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("(too lon") +// DynamicStringFragment("g)") +// LineNumber(100) +// CategoryPair(JS::ProfilingCategoryPair::NETWORK) +// +// - A wasm JIT frame: +// +// Label("") +// FrameFlags(uint64_t(0)) +// DynamicStringFragment("wasm-fun") +// DynamicStringFragment("ction[87") +// DynamicStringFragment("36] (blo") +// DynamicStringFragment("b:http:/") +// DynamicStringFragment("/webasse") +// DynamicStringFragment("mbly.org") +// DynamicStringFragment("/3dc5759") +// DynamicStringFragment("4-ce58-4") +// DynamicStringFragment("626-975b") +// DynamicStringFragment("-08ad116") +// DynamicStringFragment("30bc1:38") +// DynamicStringFragment("29856)") +// +// - A JS frame in a synchronous sample: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("u (https") +// DynamicStringFragment("://perf-") +// DynamicStringFragment("html.io/") +// DynamicStringFragment("ac0da204") +// DynamicStringFragment("aaa44d75") +// DynamicStringFragment("a800.bun") +// DynamicStringFragment("dle.js:2") +// DynamicStringFragment("5)") + +// Because this is a format entirely internal to the Profiler, any parsing +// error indicates a bug in the ProfileBuffer writing or the parser itself, +// or possibly flaky hardware. +#define ERROR_AND_CONTINUE(msg) \ + { \ + fprintf(stderr, "ProfileBuffer parse error: %s", msg); \ + MOZ_ASSERT(false, msg); \ + continue; \ + } + +struct StreamingParametersForThread { + SpliceableJSONWriter& mWriter; + UniqueStacks& mUniqueStacks; + ThreadStreamingContext::PreviousStackState& mPreviousStackState; + uint32_t& mPreviousStack; + + StreamingParametersForThread( + SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks, + ThreadStreamingContext::PreviousStackState& aPreviousStackState, + uint32_t& aPreviousStack) + : mWriter(aWriter), + mUniqueStacks(aUniqueStacks), + mPreviousStackState(aPreviousStackState), + mPreviousStack(aPreviousStack) {} +}; + +// GetStreamingParametersForThreadCallback: +// (ProfilerThreadId) -> Maybe +template +ProfilerThreadId ProfileBuffer::DoStreamSamplesAndMarkersToJSON( + mozilla::FailureLatch& aFailureLatch, + GetStreamingParametersForThreadCallback&& + aGetStreamingParametersForThreadCallback, + double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers, + mozilla::ProgressLogger aProgressLogger) const { + UniquePtr dynStrBuf = MakeUnique(kMaxFrameKeyLength); + + return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + ProfilerThreadId processedThreadId; + + EntryGetter e(*aReader, aFailureLatch, std::move(aProgressLogger), + /* aInitialReadPos */ 0, aStreamingContextForMarkers); + + for (;;) { + // This block skips entries until we find the start of the next sample. + // This is useful in three situations. + // + // - The circular buffer overwrites old entries, so when we start parsing + // we might be in the middle of a sample, and we must skip forward to + // the start of the next sample. + // + // - We skip samples that don't have an appropriate ThreadId or Time. + // + // - We skip range Pause, Resume, CollectionStart, Marker, Counter + // and CollectionEnd entries between samples. + while (e.Has()) { + if (e.Get().IsThreadId()) { + break; + } + e.Next(); + } + + if (!e.Has()) { + break; + } + + // Due to the skip_to_next_sample block above, if we have an entry here it + // must be a ThreadId entry. + MOZ_ASSERT(e.Get().IsThreadId()); + + ProfilerThreadId threadId = e.Get().GetThreadId(); + e.Next(); + + Maybe streamingParameters = + std::forward( + aGetStreamingParametersForThreadCallback)(threadId); + + // Ignore samples that are for the wrong thread. + if (!streamingParameters) { + continue; + } + + SpliceableJSONWriter& writer = streamingParameters->mWriter; + UniqueStacks& uniqueStacks = streamingParameters->mUniqueStacks; + ThreadStreamingContext::PreviousStackState& previousStackState = + streamingParameters->mPreviousStackState; + uint32_t& previousStack = streamingParameters->mPreviousStack; + + auto ReadStack = [&](EntryGetter& e, double time, uint64_t entryPosition, + const Maybe& unresponsiveDuration, + const RunningTimes& runningTimes) { + if (writer.Failed()) { + return; + } + + Maybe maybeStack = + uniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)")); + if (!maybeStack) { + writer.SetFailure("BeginStack failure"); + return; + } + + UniqueStacks::StackKey stack = *maybeStack; + + int numFrames = 0; + while (e.Has()) { + if (e.Get().IsNativeLeafAddr()) { + numFrames++; + + void* pc = e.Get().GetPtr(); + e.Next(); + + nsAutoCString functionNameOrAddress = + uniqueStacks.FunctionNameOrAddress(pc); + + maybeStack = uniqueStacks.AppendFrame( + stack, UniqueStacks::FrameKey(functionNameOrAddress.get())); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + + } else if (e.Get().IsLabel()) { + numFrames++; + + const char* label = e.Get().GetString(); + e.Next(); + + using FrameFlags = js::ProfilingStackFrame::Flags; + uint32_t frameFlags = 0; + if (e.Has() && e.Get().IsFrameFlags()) { + frameFlags = uint32_t(e.Get().GetUint64()); + e.Next(); + } + + bool relevantForJS = + frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS); + + bool isBaselineInterp = + frameFlags & uint32_t(FrameFlags::IS_BLINTERP_FRAME); + + // Copy potential dynamic string fragments into dynStrBuf, so that + // dynStrBuf will then contain the entire dynamic string. + size_t i = 0; + dynStrBuf[0] = '\0'; + while (e.Has()) { + if (e.Get().IsDynamicStringFragment()) { + char chars[ProfileBufferEntry::kNumChars]; + e.Get().CopyCharsInto(chars); + for (char c : chars) { + if (i < kMaxFrameKeyLength) { + dynStrBuf[i] = c; + i++; + } + } + e.Next(); + } else { + break; + } + } + dynStrBuf[kMaxFrameKeyLength - 1] = '\0'; + bool hasDynamicString = (i != 0); + + nsAutoCStringN<1024> frameLabel; + if (label[0] != '\0' && hasDynamicString) { + if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) { + frameLabel.AppendPrintf("%s.%s", label, dynStrBuf.get()); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) { + frameLabel.AppendPrintf("get %s.%s", label, dynStrBuf.get()); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) { + frameLabel.AppendPrintf("set %s.%s", label, dynStrBuf.get()); + } else { + frameLabel.AppendPrintf("%s %s", label, dynStrBuf.get()); + } + } else if (hasDynamicString) { + frameLabel.Append(dynStrBuf.get()); + } else { + frameLabel.Append(label); + } + + uint64_t innerWindowID = 0; + if (e.Has() && e.Get().IsInnerWindowID()) { + innerWindowID = uint64_t(e.Get().GetUint64()); + e.Next(); + } + + Maybe line; + if (e.Has() && e.Get().IsLineNumber()) { + line = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe column; + if (e.Has() && e.Get().IsColumnNumber()) { + column = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe categoryPair; + if (e.Has() && e.Get().IsCategoryPair()) { + categoryPair = + Some(JS::ProfilingCategoryPair(uint32_t(e.Get().GetInt()))); + e.Next(); + } + + maybeStack = uniqueStacks.AppendFrame( + stack, + UniqueStacks::FrameKey(std::move(frameLabel), relevantForJS, + isBaselineInterp, innerWindowID, line, + column, categoryPair)); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + + } else if (e.Get().IsJitReturnAddr()) { + numFrames++; + + // A JIT frame may expand to multiple frames due to inlining. + void* pc = e.Get().GetPtr(); + const Maybe>& frameKeys = + uniqueStacks.LookupFramesForJITAddressFromBufferPos( + pc, entryPosition ? entryPosition : e.CurPos()); + MOZ_RELEASE_ASSERT( + frameKeys, + "Attempting to stream samples for a buffer range " + "for which we don't have JITFrameInfo?"); + for (const UniqueStacks::FrameKey& frameKey : *frameKeys) { + maybeStack = uniqueStacks.AppendFrame(stack, frameKey); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + } + + e.Next(); + + } else { + break; + } + } + + // Even if this stack is considered empty, it contains the root frame, + // which needs to be in the JSON output because following "same samples" + // may refer to it when reusing this sample.mStack. + const Maybe stackIndex = + uniqueStacks.GetOrAddStackIndex(stack); + if (!stackIndex) { + writer.SetFailure("Can't add unique string for stack"); + return; + } + + // And store that possibly-empty stack in case it's followed by "same + // sample" entries. + previousStack = *stackIndex; + previousStackState = (numFrames == 0) + ? ThreadStreamingContext::eStackWasEmpty + : ThreadStreamingContext::eStackWasNotEmpty; + + // Even if too old or empty, we did process a sample for this thread id. + processedThreadId = threadId; + + // Discard samples that are too old. + if (time < aSinceTime) { + return; + } + + if (numFrames == 0 && runningTimes.IsEmpty()) { + // It is possible to have empty stacks if native stackwalking is + // disabled. Skip samples with empty stacks, unless we have useful + // running times. + return; + } + + WriteSample(writer, ProfileSample{*stackIndex, time, + unresponsiveDuration, runningTimes}); + }; // End of `ReadStack(EntryGetter&)` lambda. + + if (e.Has() && e.Get().IsTime()) { + double time = e.Get().GetDouble(); + e.Next(); + // Note: Even if this sample is too old (before aSinceTime), we still + // need to read it, so that its frames are in the tables, in case there + // is a same-sample following it that would be after aSinceTime, which + // would need these frames to be present. + + ReadStack(e, time, 0, Nothing{}, RunningTimes{}); + + e.SetLocalProgress("Processed sample"); + } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) { + double time = e.Get().GetDouble(); + // Note: Even if this sample is too old (before aSinceTime), we still + // need to read it, so that its frames are in the tables, in case there + // is a same-sample following it that would be after aSinceTime, which + // would need these frames to be present. + + RunningTimes runningTimes; + Maybe unresponsiveDuration; + + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject(); + + // There may be running times before the CompactStack. + if (kind == ProfileBufferEntry::Kind::RunningTimes) { + er.ReadIntoObject(runningTimes); + continue; + } + + // There may be an UnresponsiveDurationMs before the CompactStack. + if (kind == ProfileBufferEntry::Kind::UnresponsiveDurationMs) { + unresponsiveDuration = Some(er.ReadObject()); + continue; + } + + if (kind == ProfileBufferEntry::Kind::CompactStack) { + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + WorkerChunkManager()); + er.ReadIntoObject(tempBuffer); + tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "Local ProfileChunkedBuffer cannot be out-of-session"); + // This is a compact stack, it should only contain one sample. + EntryGetter stackEntryGetter(*aReader, aFailureLatch); + ReadStack(stackEntryGetter, time, + it.CurrentBlockIndex().ConvertToProfileBufferIndex(), + unresponsiveDuration, runningTimes); + }); + WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); + break; + } + + if (kind == ProfileBufferEntry::Kind::Marker && + aStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *aStreamingContextForMarkers); + continue; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + } + + e.RestartAfter(it); + + e.SetLocalProgress("Processed compact sample"); + } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) { + if (previousStackState == ThreadStreamingContext::eNoStackYet) { + // We don't have any full sample yet, we cannot duplicate a "previous" + // one. This should only happen at most once per thread, for the very + // first sample. + continue; + } + + ProfileSample sample; + + // Keep the same `mStack` as previously output. + // Note that it may be empty, this is checked below before writing it. + sample.mStack = previousStack; + + sample.mTime = e.Get().GetDouble(); + + // Ignore samples that are too old. + if (sample.mTime < aSinceTime) { + e.Next(); + continue; + } + + sample.mResponsiveness = Nothing{}; + + sample.mRunningTimes.Clear(); + + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject(); + + // There may be running times before the SameSample. + if (kind == ProfileBufferEntry::Kind::RunningTimes) { + er.ReadIntoObject(sample.mRunningTimes); + continue; + } + + if (kind == ProfileBufferEntry::Kind::SameSample) { + if (previousStackState == ThreadStreamingContext::eStackWasEmpty && + sample.mRunningTimes.IsEmpty()) { + // Skip samples with empty stacks, unless we have useful running + // times. + break; + } + WriteSample(writer, sample); + break; + } + + if (kind == ProfileBufferEntry::Kind::Marker && + aStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *aStreamingContextForMarkers); + continue; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeSameSample and SameSample"); + er.SetRemainingBytes(0); + } + + e.RestartAfter(it); + + e.SetLocalProgress("Processed repeated sample"); + } else { + ERROR_AND_CONTINUE("expected a Time entry"); + } + } + + return processedThreadId; + }); +} + +ProfilerThreadId ProfileBuffer::StreamSamplesToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const { + ThreadStreamingContext::PreviousStackState previousStackState = + ThreadStreamingContext::eNoStackYet; + uint32_t stack = 0u; +#ifdef DEBUG + int processedCount = 0; +#endif // DEBUG + return DoStreamSamplesAndMarkersToJSON( + aWriter.SourceFailureLatch(), + [&](ProfilerThreadId aReadThreadId) { + Maybe streamingParameters; +#ifdef DEBUG + ++processedCount; + MOZ_ASSERT( + aThreadId.IsSpecified() || + (processedCount == 1 && aReadThreadId.IsSpecified()), + "Unspecified aThreadId should only be used with 1-sample buffer"); +#endif // DEBUG + if (!aThreadId.IsSpecified() || aThreadId == aReadThreadId) { + streamingParameters.emplace(aWriter, aUniqueStacks, + previousStackState, stack); + } + return streamingParameters; + }, + aSinceTime, /* aStreamingContextForMarkers */ nullptr, + std::move(aProgressLogger)); +} + +void ProfileBuffer::StreamSamplesAndMarkersToJSON( + ProcessStreamingContext& aProcessStreamingContext, + mozilla::ProgressLogger aProgressLogger) const { + (void)DoStreamSamplesAndMarkersToJSON( + aProcessStreamingContext.SourceFailureLatch(), + [&](ProfilerThreadId aReadThreadId) { + Maybe streamingParameters; + ThreadStreamingContext* threadData = + aProcessStreamingContext.GetThreadStreamingContext(aReadThreadId); + if (threadData) { + streamingParameters.emplace( + threadData->mSamplesDataWriter, *threadData->mUniqueStacks, + threadData->mPreviousStackState, threadData->mPreviousStack); + } + return streamingParameters; + }, + aProcessStreamingContext.GetSinceTime(), &aProcessStreamingContext, + std::move(aProgressLogger)); +} + +void ProfileBuffer::AddJITInfoForRange( + uint64_t aRangeStart, ProfilerThreadId aThreadId, JSContext* aContext, + JITFrameInfo& aJITFrameInfo, + mozilla::ProgressLogger aProgressLogger) const { + // We can only process JitReturnAddr entries if we have a JSContext. + MOZ_RELEASE_ASSERT(aContext); + + aRangeStart = std::max(aRangeStart, BufferRangeStart()); + aJITFrameInfo.AddInfoForRange( + aRangeStart, BufferRangeEnd(), aContext, + [&](const std::function& aJITAddressConsumer) { + // Find all JitReturnAddr entries in the given range for the given + // thread, and call aJITAddressConsumer with those addresses. + + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when " + "sampler is running"); + + EntryGetter e(*aReader, aJITFrameInfo.LocalFailureLatchSource(), + std::move(aProgressLogger), aRangeStart); + + while (true) { + // Advance to the next ThreadId entry. + while (e.Has() && !e.Get().IsThreadId()) { + e.Next(); + } + if (!e.Has()) { + break; + } + + MOZ_ASSERT(e.Get().IsThreadId()); + ProfilerThreadId threadId = e.Get().GetThreadId(); + e.Next(); + + // Ignore samples that are for a different thread. + if (threadId != aThreadId) { + continue; + } + + if (e.Has() && e.Get().IsTime()) { + // Legacy stack. + e.Next(); + while (e.Has() && !e.Get().IsThreadId()) { + if (e.Get().IsJitReturnAddr()) { + aJITAddressConsumer(e.Get().GetPtr()); + } + e.Next(); + } + } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) { + // Compact stack. + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject(); + if (kind == ProfileBufferEntry::Kind::CompactStack) { + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + WorkerChunkManager()); + er.ReadIntoObject(tempBuffer); + tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT( + aReader, + "Local ProfileChunkedBuffer cannot be out-of-session"); + EntryGetter stackEntryGetter( + *aReader, aJITFrameInfo.LocalFailureLatchSource()); + while (stackEntryGetter.Has()) { + if (stackEntryGetter.Get().IsJitReturnAddr()) { + aJITAddressConsumer(stackEntryGetter.Get().GetPtr()); + } + stackEntryGetter.Next(); + } + }); + WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); + break; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + } + + e.Next(); + } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) { + // Sample index, nothing to do. + + } else { + ERROR_AND_CONTINUE("expected a Time entry"); + } + } + }); + }); +} + +void ProfileBuffer::StreamMarkersToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + const TimeStamp& aProcessStartTime, double aSinceTime, + UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const { + mEntries.ReadEach([&](ProfileBufferEntryReader& aER) { + auto type = static_cast( + aER.ReadObject()); + MOZ_ASSERT(static_cast(type) < + static_cast( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (type == ProfileBufferEntry::Kind::Marker) { + mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream( + aER, + [&](const ProfilerThreadId& aMarkerThreadId) { + return (!aThreadId.IsSpecified() || aMarkerThreadId == aThreadId) + ? &aWriter + : nullptr; + }, + [&](ProfileChunkedBuffer& aChunkedBuffer) { + ProfilerBacktrace backtrace("", &aChunkedBuffer); + backtrace.StreamJSON(aWriter, aProcessStartTime, aUniqueStacks); + }, + [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag + aTag) { + size_t payloadSize = aER.RemainingBytes(); + + ProfileBufferEntryReader::DoubleSpanOfConstBytes spans = + aER.ReadSpans(payloadSize); + if (MOZ_LIKELY(spans.IsSingleSpan())) { + // Only a single span, we can just refer to it directly + // instead of copying it. + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, spans.mFirstOrOnly.Elements(), payloadSize, &aWriter); + } else { + // Two spans, we need to concatenate them by copying. + uint8_t* payloadBuffer = new uint8_t[payloadSize]; + spans.CopyBytesTo(payloadBuffer); + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, payloadBuffer, payloadSize, &aWriter); + delete[] payloadBuffer; + } + }); + } else { + // The entry was not a marker, we need to skip to the end. + aER.SetRemainingBytes(0); + } + }); +} + +void ProfileBuffer::StreamProfilerOverheadToJSON( + SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime, + double aSinceTime, mozilla::ProgressLogger aProgressLogger) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + std::move(aProgressLogger)); + + enum Schema : uint32_t { + TIME = 0, + LOCKING = 1, + MARKER_CLEANING = 2, + COUNTERS = 3, + THREADS = 4 + }; + + aWriter.StartObjectProperty("profilerOverhead"); + aWriter.StartObjectProperty("samples"); + // Stream all sampling overhead data. We skip other entries, because we + // process them in StreamSamplesToJSON()/etc. + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("locking"); + schema.WriteField("expiredMarkerCleaning"); + schema.WriteField("counters"); + schema.WriteField("threads"); + } + + aWriter.StartArrayProperty("data"); + double firstTime = 0.0; + double lastTime = 0.0; + ProfilerStats intervals, overheads, lockings, cleanings, counters, threads; + while (e.Has()) { + // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4 + if (e.Get().IsProfilerOverheadTime()) { + double time = e.Get().GetDouble(); + if (time >= aSinceTime) { + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime"); + } + double locking = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration"); + } + double cleaning = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*2"); + } + double counter = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*3"); + } + double thread = e.Get().GetDouble(); + + if (firstTime == 0.0) { + firstTime = time; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many + // thousands of iterations. + intervals.Count(time - lastTime); + } + lastTime = time; + overheads.Count(locking + cleaning + counter + thread); + lockings.Count(locking); + cleanings.Count(cleaning); + counters.Count(counter); + threads.Count(thread); + + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, time); + writer.DoubleElement(LOCKING, locking); + writer.DoubleElement(MARKER_CLEANING, cleaning); + writer.DoubleElement(COUNTERS, counter); + writer.DoubleElement(THREADS, thread); + } + } + e.Next(); + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + + // Only output statistics if there is at least one full interval (and + // therefore at least two samplings.) + if (intervals.n > 0) { + aWriter.StartObjectProperty("statistics"); + aWriter.DoubleProperty("profiledDuration", lastTime - firstTime); + aWriter.IntProperty("samplingCount", overheads.n); + aWriter.DoubleProperty("overheadDurations", overheads.sum); + aWriter.DoubleProperty("overheadPercentage", + overheads.sum / (lastTime - firstTime)); +#define PROFILER_STATS(name, var) \ + aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \ + aWriter.DoubleProperty("min" name, (var).min); \ + aWriter.DoubleProperty("max" name, (var).max); + PROFILER_STATS("Interval", intervals); + PROFILER_STATS("Overhead", overheads); + PROFILER_STATS("Lockings", lockings); + PROFILER_STATS("Cleaning", cleanings); + PROFILER_STATS("Counter", counters); + PROFILER_STATS("Thread", threads); +#undef PROFILER_STATS + aWriter.EndObject(); // statistics + } + aWriter.EndObject(); // profilerOverhead + }); +} + +struct CounterKeyedSample { + double mTime; + uint64_t mNumber; + int64_t mCount; +}; + +using CounterKeyedSamples = Vector; + +static LazyLogModule sFuzzyfoxLog("Fuzzyfox"); + +using CounterMap = HashMap; + +// HashMap lookup, if not found, a default value is inserted. +// Returns reference to (existing or new) value inside the HashMap. +template +static auto& LookupOrAdd(HashM& aMap, Key&& aKey) { + auto addPtr = aMap.lookupForAdd(aKey); + if (!addPtr) { + MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward(aKey), + typename HashM::Entry::ValueType{})); + MOZ_ASSERT(!!addPtr); + } + return addPtr->value(); +} + +void ProfileBuffer::StreamCountersToJSON( + SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime, + double aSinceTime, mozilla::ProgressLogger aProgressLogger) const { + // Because this is a format entirely internal to the Profiler, any parsing + // error indicates a bug in the ProfileBuffer writing or the parser itself, + // or possibly flaky hardware. + + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + std::move(aProgressLogger)); + + enum Schema : uint32_t { TIME = 0, COUNT = 1, NUMBER = 2 }; + + // Stream all counters. We skip other entries, because we process them in + // StreamSamplesToJSON()/etc. + // + // Valid sequence in the buffer: + // CounterID + // Time + // ( CounterKey Count Number? )* + // + // And the JSON (example): + // "counters": { + // "name": "malloc", + // "category": "Memory", + // "description": "Amount of allocated memory", + // "sample_groups": { + // "id": 0, + // "samples": { + // "schema": {"time": 0, "number": 1, "count": 2}, + // "data": [ + // [ + // 16117.033968000002, + // 2446216, + // 6801320 + // ], + // [ + // 16118.037638, + // 2446216, + // 6801320 + // ], + // ], + // } + // } + // }, + + // Build the map of counters and populate it + HashMap counters; + + while (e.Has()) { + // skip all non-Counters, including if we start in the middle of a counter + if (e.Get().IsCounterId()) { + void* id = e.Get().GetPtr(); + CounterMap& counter = LookupOrAdd(counters, id); + e.Next(); + if (!e.Has() || !e.Get().IsTime()) { + ERROR_AND_CONTINUE("expected a Time entry"); + } + double time = e.Get().GetDouble(); + e.Next(); + if (time >= aSinceTime) { + while (e.Has() && e.Get().IsCounterKey()) { + uint64_t key = e.Get().GetUint64(); + CounterKeyedSamples& data = LookupOrAdd(counter, key); + e.Next(); + if (!e.Has() || !e.Get().IsCount()) { + ERROR_AND_CONTINUE("expected a Count entry"); + } + int64_t count = e.Get().GetUint64(); + e.Next(); + uint64_t number; + if (!e.Has() || !e.Get().IsNumber()) { + number = 0; + } else { + number = e.Get().GetInt64(); + e.Next(); + } + CounterKeyedSample sample = {time, number, count}; + MOZ_RELEASE_ASSERT(data.append(sample)); + } + } else { + // skip counter sample - only need to skip the initial counter + // id, then let the loop at the top skip the rest + } + } else { + e.Next(); + } + } + // we have a map of a map of counter entries; dump them to JSON + if (counters.count() == 0) { + return; + } + + aWriter.StartArrayProperty("counters"); + for (auto iter = counters.iter(); !iter.done(); iter.next()) { + CounterMap& counter = iter.get().value(); + const BaseProfilerCount* base_counter = + static_cast(iter.get().key()); + + aWriter.Start(); + aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel)); + aWriter.StringProperty("category", + MakeStringSpan(base_counter->mCategory)); + aWriter.StringProperty("description", + MakeStringSpan(base_counter->mDescription)); + + aWriter.StartArrayProperty("sample_groups"); + for (auto counter_iter = counter.iter(); !counter_iter.done(); + counter_iter.next()) { + CounterKeyedSamples& samples = counter_iter.get().value(); + uint64_t key = counter_iter.get().key(); + + size_t size = samples.length(); + if (size == 0) { + continue; + } + + bool hasNumber = false; + for (size_t i = 0; i < size; i++) { + if (samples[i].mNumber != 0) { + hasNumber = true; + break; + } + } + + aWriter.StartObjectElement(); + { + aWriter.IntProperty("id", static_cast(key)); + aWriter.StartObjectProperty("samples"); + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("count"); + if (hasNumber) { + schema.WriteField("number"); + } + } + + aWriter.StartArrayProperty("data"); + double previousSkippedTime = 0.0; + uint64_t previousNumber = 0; + int64_t previousCount = 0; + for (size_t i = 0; i < size; i++) { + // Encode as deltas, and only encode if different than the previous + // or next sample; Always write the first and last samples. + if (i == 0 || i == size - 1 || + samples[i].mNumber != previousNumber || + samples[i].mCount != previousCount || + // Ensure we ouput the first 0 before skipping samples. + (i >= 2 && (samples[i - 2].mNumber != previousNumber || + samples[i - 2].mCount != previousCount))) { + if (i != 0 && samples[i].mTime >= samples[i - 1].mTime) { + MOZ_LOG(sFuzzyfoxLog, mozilla::LogLevel::Error, + ("Fuzzyfox Profiler Assertion: %f >= %f", + samples[i].mTime, samples[i - 1].mTime)); + } + MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime); + MOZ_ASSERT(samples[i].mNumber >= previousNumber); + MOZ_ASSERT(samples[i].mNumber - previousNumber <= + uint64_t(std::numeric_limits::max())); + + int64_t numberDelta = + static_cast(samples[i].mNumber - previousNumber); + int64_t countDelta = samples[i].mCount - previousCount; + + if (previousSkippedTime != 0.0 && + (numberDelta != 0 || countDelta != 0)) { + // Write the last skipped sample, unless the new one is all + // zeroes (that'd be redundant) This is useful to know when a + // certain value was last sampled, so that the front-end graph + // will be more correct. + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, previousSkippedTime); + // The deltas are effectively zeroes, since no change happened + // between the last actually-written sample and the last skipped + // one. + writer.IntElement(COUNT, 0); + if (hasNumber) { + writer.IntElement(NUMBER, 0); + } + } + + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, samples[i].mTime); + writer.IntElement(COUNT, countDelta); + if (hasNumber) { + writer.IntElement(NUMBER, numberDelta); + } + + previousSkippedTime = 0.0; + previousNumber = samples[i].mNumber; + previousCount = samples[i].mCount; + } else { + previousSkippedTime = samples[i].mTime; + } + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + } + aWriter.EndObject(); // sample_groups item + } + aWriter.EndArray(); // sample groups + aWriter.End(); // for each counter + } + aWriter.EndArray(); // counters + }); +} + +#undef ERROR_AND_CONTINUE + +static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason, + const Maybe& aStartTime, + const Maybe& aEndTime) { + aWriter.Start(); + if (aStartTime) { + aWriter.TimeDoubleMsProperty("startTime", *aStartTime); + } else { + aWriter.NullProperty("startTime"); + } + if (aEndTime) { + aWriter.TimeDoubleMsProperty("endTime", *aEndTime); + } else { + aWriter.NullProperty("endTime"); + } + aWriter.StringProperty("reason", MakeStringSpan(aReason)); + aWriter.End(); +} + +void ProfileBuffer::StreamPausedRangesToJSON( + SpliceableJSONWriter& aWriter, double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Streaming pauses...", 99_pc, "Streamed pauses")); + + Maybe currentPauseStartTime; + Maybe currentCollectionStartTime; + + while (e.Has()) { + if (e.Get().IsPause()) { + currentPauseStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsResume()) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Some(e.Get().GetDouble())); + currentPauseStartTime = Nothing(); + } else if (e.Get().IsCollectionStart()) { + currentCollectionStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsCollectionEnd()) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Some(e.Get().GetDouble())); + currentCollectionStartTime = Nothing(); + } + e.Next(); + } + + if (currentPauseStartTime) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Nothing()); + } + if (currentCollectionStartTime) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Nothing()); + } + }); +} + +bool ProfileBuffer::DuplicateLastSample(ProfilerThreadId aThreadId, + double aSampleTimeMs, + Maybe& aLastSample, + const RunningTimes& aRunningTimes) { + if (!aLastSample) { + return false; + } + + if (mEntries.IsIndexInCurrentChunk(ProfileBufferIndex{*aLastSample})) { + // The last (fully-written) sample is in this chunk, we can refer to it. + + // Note that between now and when we write the SameSample below, another + // chunk could have been started, so the SameSample will in fact refer to a + // block in a previous chunk. This is okay, because: + // - When serializing to JSON, if that chunk is still there, we'll still be + // able to find that old stack, so nothing will be lost. + // - If unfortunately that chunk has been destroyed, we will lose this + // sample. But this will only happen to the first sample (per thread) in + // in the whole JSON output, because the next time we're here to duplicate + // the same sample again, IsIndexInCurrentChunk will say `false` and we + // will fall back to the normal copy or even re-sample. Losing the first + // sample out of many in a whole recording is acceptable. + // + // |---| = chunk, S = Sample, D = Duplicate, s = same sample + // |---S-s-s--| |s-D--s--s-| |s-D--s---s| + // Later, the first chunk is destroyed/recycled: + // |s-D--s--s-| |s-D--s---s| |-... + // Output: ^ ^ ^ ^ + // `-|--|-------|--- Same but no previous -> lost. + // `--|-------|--- Full duplicate sample. + // `-------|--- Same with previous -> okay. + // `--- Same but now we have a previous -> okay! + + AUTO_PROFILER_STATS(DuplicateLastSample_SameSample); + + // Add the thread id first. We don't update `aLastSample` because we are not + // writing a full sample. + (void)AddThreadIdEntry(aThreadId); + + // Copy the new time, to be followed by a SameSample. + AddEntry(ProfileBufferEntry::TimeBeforeSameSample(aSampleTimeMs)); + + // Add running times if they have data. + if (!aRunningTimes.IsEmpty()) { + mEntries.PutObjects(ProfileBufferEntry::Kind::RunningTimes, + aRunningTimes); + } + + // Finish with a SameSample entry. + mEntries.PutObjects(ProfileBufferEntry::Kind::SameSample); + + return true; + } + + AUTO_PROFILER_STATS(DuplicateLastSample_copy); + + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, WorkerChunkManager()); + + auto retrieveWorkerChunk = MakeScopeExit( + [&]() { WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); }); + + const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + // DuplicateLastSample is only called during profiling, so we don't need a + // progress logger (only useful when capturing the final profile). + EntryGetter e(*aReader, mozilla::FailureLatchInfallibleSource::Singleton(), + ProgressLogger{}, *aLastSample); + + if (e.CurPos() != *aLastSample) { + // The last sample is no longer within the buffer range, so we cannot + // use it. Reset the stored buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() && + e.Get().GetThreadId() == aThreadId); + + e.Next(); + + // Go through the whole entry and duplicate it, until we find the next + // one. + while (e.Has()) { + switch (e.Get().GetKind()) { + case ProfileBufferEntry::Kind::Pause: + case ProfileBufferEntry::Kind::Resume: + case ProfileBufferEntry::Kind::PauseSampling: + case ProfileBufferEntry::Kind::ResumeSampling: + case ProfileBufferEntry::Kind::CollectionStart: + case ProfileBufferEntry::Kind::CollectionEnd: + case ProfileBufferEntry::Kind::ThreadId: + case ProfileBufferEntry::Kind::TimeBeforeSameSample: + // We're done. + return true; + case ProfileBufferEntry::Kind::Time: + // Copy with new time + AddEntry(tempBuffer, ProfileBufferEntry::Time(aSampleTimeMs)); + break; + case ProfileBufferEntry::Kind::TimeBeforeCompactStack: { + // Copy with new time, followed by a compact stack. + AddEntry(tempBuffer, + ProfileBufferEntry::TimeBeforeCompactStack(aSampleTimeMs)); + + // Add running times if they have data. + if (!aRunningTimes.IsEmpty()) { + tempBuffer.PutObjects(ProfileBufferEntry::Kind::RunningTimes, + aRunningTimes); + } + + // The `CompactStack` *must* be present afterwards, but may not + // immediately follow `TimeBeforeCompactStack` (e.g., some markers + // could be written in-between), so we need to look for it in the + // following entries. + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + auto kind = static_cast( + er.ReadObject()); + MOZ_ASSERT( + static_cast(kind) < + static_cast( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (kind == ProfileBufferEntry::Kind::CompactStack) { + // Found our CompactStack, just make a copy of the whole entry. + er = *it; + auto bytes = er.RemainingBytes(); + MOZ_ASSERT(bytes < + ProfileBufferChunkManager::scExpectedMaximumStackSize); + tempBuffer.Put(bytes, [&](Maybe& aEW) { + MOZ_ASSERT(aEW.isSome(), "tempBuffer cannot be out-of-session"); + aEW->WriteFromReader(er, bytes); + }); + // CompactStack marks the end, we're done. + break; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + // Here, we have encountered a non-legacy entry that was not the + // CompactStack we're looking for; just continue the search... + } + // We're done. + return true; + } + case ProfileBufferEntry::Kind::CounterKey: + case ProfileBufferEntry::Kind::Number: + case ProfileBufferEntry::Kind::Count: + // Don't copy anything not part of a thread's stack sample + break; + case ProfileBufferEntry::Kind::CounterId: + // CounterId is normally followed by Time - if so, we'd like + // to skip it. If we duplicate Time, it won't hurt anything, just + // waste buffer space (and this can happen if the CounterId has + // fallen off the end of the buffer, but Time (and Number/Count) + // are still in the buffer). + e.Next(); + if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) { + // this would only happen if there was an invalid sequence + // in the buffer. Don't skip it. + continue; + } + // we've skipped Time + break; + case ProfileBufferEntry::Kind::ProfilerOverheadTime: + // ProfilerOverheadTime is normally followed by + // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't + // duplicate, as we are in the middle of a sampling and will soon + // capture its own overhead. + e.Next(); + // A missing Time would only happen if there was an invalid + // sequence in the buffer. Don't skip unexpected entry. + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + // we've skipped ProfilerOverheadTime and + // ProfilerOverheadDuration*4. + break; + default: { + // Copy anything else we don't know about. + AddEntry(tempBuffer, e.Get()); + break; + } + } + e.Next(); + } + return true; + }); + + if (!ok) { + return false; + } + + // If the buffer was big enough, there won't be any cleared blocks. + if (tempBuffer.GetState().mClearedBlockCount != 0) { + // No need to try to read stack again as it won't fit. Reset the stored + // buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + aLastSample = Some(AddThreadIdEntry(aThreadId)); + + mEntries.AppendContents(tempBuffer); + + return true; +} + +void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) { + // This function does nothing! + // The duration limit will be removed from Firefox, see bug 1632365. + Unused << aTime; +} + +// END ProfileBuffer +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/ProfileBufferEntry.h b/tools/profiler/core/ProfileBufferEntry.h new file mode 100644 index 0000000000..bfee4923a3 --- /dev/null +++ b/tools/profiler/core/ProfileBufferEntry.h @@ -0,0 +1,532 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntry_h +#define ProfileBufferEntry_h + +#include +#include +#include +#include +#include +#include "gtest/MozGtestFriend.h" +#include "js/ProfilingCategory.h" +#include "mozilla/Attributes.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfileBufferEntryKinds.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/UniquePtrExtensions.h" +#include "mozilla/Variant.h" +#include "mozilla/Vector.h" +#include "nsString.h" + +class ProfilerCodeAddressService; +struct JSContext; + +class ProfileBufferEntry { + public: + using KindUnderlyingType = + std::underlying_type_t<::mozilla::ProfileBufferEntryKind>; + using Kind = mozilla::ProfileBufferEntryKind; + + ProfileBufferEntry(); + + static constexpr size_t kNumChars = mozilla::ProfileBufferEntryNumChars; + + private: + // aString must be a static string. + ProfileBufferEntry(Kind aKind, const char* aString); + ProfileBufferEntry(Kind aKind, char aChars[kNumChars]); + ProfileBufferEntry(Kind aKind, void* aPtr); + ProfileBufferEntry(Kind aKind, double aDouble); + ProfileBufferEntry(Kind aKind, int64_t aInt64); + ProfileBufferEntry(Kind aKind, uint64_t aUint64); + ProfileBufferEntry(Kind aKind, int aInt); + ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId); + + public: +#define CTOR(KIND, TYPE, SIZE) \ + static ProfileBufferEntry KIND(TYPE aVal) { \ + return ProfileBufferEntry(Kind::KIND, aVal); \ + } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR) +#undef CTOR + + Kind GetKind() const { return mKind; } + +#define IS_KIND(KIND, TYPE, SIZE) \ + bool Is##KIND() const { return mKind == Kind::KIND; } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND) +#undef IS_KIND + + private: + FRIEND_TEST(ThreadProfile, InsertOneEntry); + FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap); + FRIEND_TEST(ThreadProfile, InsertEntriesWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + friend class ProfileBuffer; + + Kind mKind; + uint8_t mStorage[kNumChars]; + + const char* GetString() const; + void* GetPtr() const; + double GetDouble() const; + int GetInt() const; + int64_t GetInt64() const; + uint64_t GetUint64() const; + ProfilerThreadId GetThreadId() const; + void CopyCharsInto(char (&aOutArray)[kNumChars]) const; +}; + +// Packed layout: 1 byte for the tag + 8 bytes for the value. +static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size"); + +// Contains all the information about JIT frames that is needed to stream stack +// frames for JitReturnAddr entries in the profiler buffer. +// Every return address (void*) is mapped to one or more JITFrameKeys, and +// every JITFrameKey is mapped to a JSON string for that frame. +// mRangeStart and mRangeEnd describe the range in the buffer for which this +// mapping is valid. Only JitReturnAddr entries within that buffer range can be +// processed using this JITFrameInfoForBufferRange object. +struct JITFrameInfoForBufferRange final { + JITFrameInfoForBufferRange Clone() const; + + uint64_t mRangeStart; + uint64_t mRangeEnd; // mRangeEnd marks the first invalid index. + + struct JITFrameKey { + bool operator==(const JITFrameKey& aOther) const { + return mCanonicalAddress == aOther.mCanonicalAddress && + mDepth == aOther.mDepth; + } + bool operator!=(const JITFrameKey& aOther) const { + return !(*this == aOther); + } + + void* mCanonicalAddress; + uint32_t mDepth; + }; + struct JITFrameKeyHasher { + using Lookup = JITFrameKey; + + static mozilla::HashNumber hash(const JITFrameKey& aLookup) { + mozilla::HashNumber hash = 0; + hash = mozilla::AddToHash(hash, aLookup.mCanonicalAddress); + hash = mozilla::AddToHash(hash, aLookup.mDepth); + return hash; + } + + static bool match(const JITFrameKey& aKey, const JITFrameKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(JITFrameKey& aKey, const JITFrameKey& aNewKey) { + aKey = aNewKey; + } + }; + + using JITAddressToJITFramesMap = + mozilla::HashMap>; + JITAddressToJITFramesMap mJITAddressToJITFramesMap; + using JITFrameToFrameJSONMap = + mozilla::HashMap; + JITFrameToFrameJSONMap mJITFrameToFrameJSONMap; +}; + +// Contains JITFrameInfoForBufferRange objects for multiple profiler buffer +// ranges. +class JITFrameInfo final { + public: + JITFrameInfo() + : mUniqueStrings(mozilla::MakeUniqueFallible( + mLocalFailureLatchSource)) { + if (!mUniqueStrings) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo allocating mUniqueStrings"); + } + } + + MOZ_IMPLICIT JITFrameInfo(const JITFrameInfo& aOther, + mozilla::ProgressLogger aProgressLogger); + + // Creates a new JITFrameInfoForBufferRange object in mRanges by looking up + // information about the provided JIT return addresses using aCx. + // Addresses are provided like this: + // The caller of AddInfoForRange supplies a function in aJITAddressProvider. + // This function will be called once, synchronously, with an + // aJITAddressConsumer argument, which is a function that needs to be called + // for every address. That function can be called multiple times for the same + // address. + void AddInfoForRange( + uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx, + const std::function&)>& + aJITAddressProvider); + + // Returns whether the information stored in this object is still relevant + // for any entries in the buffer. + bool HasExpired(uint64_t aCurrentBufferRangeStart) const { + if (mRanges.empty()) { + // No information means no relevant information. Allow this object to be + // discarded. + return true; + } + return mRanges.back().mRangeEnd <= aCurrentBufferRangeStart; + } + + mozilla::FailureLatch& LocalFailureLatchSource() { + return mLocalFailureLatchSource; + } + + // The encapsulated data points at the local FailureLatch, so on the way out + // they must be given a new external FailureLatch to start using instead. + mozilla::Vector&& MoveRangesWithNewFailureLatch( + mozilla::FailureLatch& aFailureLatch) &&; + mozilla::UniquePtr&& MoveUniqueStringsWithNewFailureLatch( + mozilla::FailureLatch& aFailureLatch) &&; + + private: + // JITFrameInfo's may exist during profiling, so it carries its own fallible + // FailureLatch. If&when the data below is finally extracted, any error is + // forwarded to the caller. + mozilla::FailureLatchSource mLocalFailureLatchSource; + + // The array of ranges of JIT frame information, sorted by buffer position. + // Ranges are non-overlapping. + // The JSON of the cached frames can contain string indexes, which refer + // to strings in mUniqueStrings. + mozilla::Vector mRanges; + + // The string table which contains strings used in the frame JSON that's + // cached in mRanges. + mozilla::UniquePtr mUniqueStrings; +}; + +class UniqueStacks final : public mozilla::FailureLatch { + public: + struct FrameKey { + explicit FrameKey(const char* aLocation) + : mData(NormalFrameData{nsCString(aLocation), false, false, 0, + mozilla::Nothing(), mozilla::Nothing()}) {} + + FrameKey(nsCString&& aLocation, bool aRelevantForJS, bool aBaselineInterp, + uint64_t aInnerWindowID, const mozilla::Maybe& aLine, + const mozilla::Maybe& aColumn, + const mozilla::Maybe& aCategoryPair) + : mData(NormalFrameData{aLocation, aRelevantForJS, aBaselineInterp, + aInnerWindowID, aLine, aColumn, + aCategoryPair}) {} + + FrameKey(void* aJITAddress, uint32_t aJITDepth, uint32_t aRangeIndex) + : mData(JITFrameData{aJITAddress, aJITDepth, aRangeIndex}) {} + + FrameKey(const FrameKey& aToCopy) = default; + + uint32_t Hash() const; + bool operator==(const FrameKey& aOther) const { + return mData == aOther.mData; + } + + struct NormalFrameData { + bool operator==(const NormalFrameData& aOther) const; + + nsCString mLocation; + bool mRelevantForJS; + bool mBaselineInterp; + uint64_t mInnerWindowID; + mozilla::Maybe mLine; + mozilla::Maybe mColumn; + mozilla::Maybe mCategoryPair; + }; + struct JITFrameData { + bool operator==(const JITFrameData& aOther) const; + + void* mCanonicalAddress; + uint32_t mDepth; + uint32_t mRangeIndex; + }; + mozilla::Variant mData; + }; + + struct FrameKeyHasher { + using Lookup = FrameKey; + + static mozilla::HashNumber hash(const FrameKey& aLookup) { + mozilla::HashNumber hash = 0; + if (aLookup.mData.is()) { + const FrameKey::NormalFrameData& data = + aLookup.mData.as(); + if (!data.mLocation.IsEmpty()) { + hash = mozilla::AddToHash(hash, + mozilla::HashString(data.mLocation.get())); + } + hash = mozilla::AddToHash(hash, data.mRelevantForJS); + hash = mozilla::AddToHash(hash, data.mBaselineInterp); + hash = mozilla::AddToHash(hash, data.mInnerWindowID); + if (data.mLine.isSome()) { + hash = mozilla::AddToHash(hash, *data.mLine); + } + if (data.mColumn.isSome()) { + hash = mozilla::AddToHash(hash, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + hash = mozilla::AddToHash(hash, + static_cast(*data.mCategoryPair)); + } + } else { + const FrameKey::JITFrameData& data = + aLookup.mData.as(); + hash = mozilla::AddToHash(hash, data.mCanonicalAddress); + hash = mozilla::AddToHash(hash, data.mDepth); + hash = mozilla::AddToHash(hash, data.mRangeIndex); + } + return hash; + } + + static bool match(const FrameKey& aKey, const FrameKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(FrameKey& aKey, const FrameKey& aNewKey) { + aKey = aNewKey; + } + }; + + struct StackKey { + mozilla::Maybe mPrefixStackIndex; + uint32_t mFrameIndex; + + explicit StackKey(uint32_t aFrame) + : mFrameIndex(aFrame), mHash(mozilla::HashGeneric(aFrame)) {} + + StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex, + uint32_t aFrame) + : mPrefixStackIndex(mozilla::Some(aPrefixStackIndex)), + mFrameIndex(aFrame), + mHash(mozilla::AddToHash(aPrefix.mHash, aFrame)) {} + + mozilla::HashNumber Hash() const { return mHash; } + + bool operator==(const StackKey& aOther) const { + return mPrefixStackIndex == aOther.mPrefixStackIndex && + mFrameIndex == aOther.mFrameIndex; + } + + private: + mozilla::HashNumber mHash; + }; + + struct StackKeyHasher { + using Lookup = StackKey; + + static mozilla::HashNumber hash(const StackKey& aLookup) { + return aLookup.Hash(); + } + + static bool match(const StackKey& aKey, const StackKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(StackKey& aKey, const StackKey& aNewKey) { + aKey = aNewKey; + } + }; + + UniqueStacks(mozilla::FailureLatch& aFailureLatch, + JITFrameInfo&& aJITFrameInfo, + ProfilerCodeAddressService* aCodeAddressService = nullptr); + + // Return a StackKey for aFrame as the stack's root frame (no prefix). + [[nodiscard]] mozilla::Maybe BeginStack(const FrameKey& aFrame); + + // Return a new StackKey that is obtained by appending aFrame to aStack. + [[nodiscard]] mozilla::Maybe AppendFrame(const StackKey& aStack, + const FrameKey& aFrame); + + // Look up frame keys for the given JIT address, and ensure that our frame + // table has entries for the returned frame keys. The JSON for these frames + // is taken from mJITInfoRanges. + // aBufferPosition is needed in order to look up the correct JIT frame info + // object in mJITInfoRanges. + [[nodiscard]] mozilla::Maybe> + LookupFramesForJITAddressFromBufferPos(void* aJITAddress, + uint64_t aBufferPosition); + + [[nodiscard]] mozilla::Maybe GetOrAddFrameIndex( + const FrameKey& aFrame); + [[nodiscard]] mozilla::Maybe GetOrAddStackIndex( + const StackKey& aStack); + + void SpliceFrameTableElements(SpliceableJSONWriter& aWriter); + void SpliceStackTableElements(SpliceableJSONWriter& aWriter); + + [[nodiscard]] UniqueJSONStrings& UniqueStrings() { + MOZ_RELEASE_ASSERT(mUniqueStrings.get()); + return *mUniqueStrings; + } + + // Find the function name at the given PC (if a ProfilerCodeAddressService was + // provided), otherwise just stringify that PC. + [[nodiscard]] nsAutoCString FunctionNameOrAddress(void* aPC); + + FAILURELATCH_IMPL_PROXY(mFrameTableWriter) + + private: + void StreamNonJITFrame(const FrameKey& aFrame); + void StreamStack(const StackKey& aStack); + + mozilla::UniquePtr mUniqueStrings; + + ProfilerCodeAddressService* mCodeAddressService = nullptr; + + SpliceableChunkedJSONWriter mFrameTableWriter; + mozilla::HashMap mFrameToIndexMap; + + SpliceableChunkedJSONWriter mStackTableWriter; + mozilla::HashMap mStackToIndexMap; + + mozilla::Vector mJITInfoRanges; +}; + +// +// Thread profile JSON Format +// -------------------------- +// +// The profile contains much duplicate information. The output JSON of the +// profile attempts to deduplicate strings, frames, and stack prefixes, to cut +// down on size and to increase JSON streaming speed. Deduplicated values are +// streamed as indices into their respective tables. +// +// Further, arrays of objects with the same set of properties (e.g., samples, +// frames) are output as arrays according to a schema instead of an object +// with property names. A property that is not present is represented in the +// array as null or undefined. +// +// The format of the thread profile JSON is shown by the following example +// with 1 sample and 1 marker: +// +// { +// "name": "Foo", +// "tid": 42, +// "samples": +// { +// "schema": +// { +// "stack": 0, /* index into stackTable */ +// "time": 1, /* number */ +// "eventDelay": 2, /* number */ +// "ThreadCPUDelta": 3, /* optional number */ +// }, +// "data": +// [ +// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, eventDelay: 0.0 } */ +// ] +// }, +// +// "markers": +// { +// "schema": +// { +// "name": 0, /* index into stringTable */ +// "time": 1, /* number */ +// "data": 2 /* arbitrary JSON */ +// }, +// "data": +// [ +// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */ +// ] +// }, +// +// "stackTable": +// { +// "schema": +// { +// "prefix": 0, /* index into stackTable */ +// "frame": 1 /* index into frameTable */ +// }, +// "data": +// [ +// [ null, 0 ], /* (root) */ +// [ 0, 1 ] /* (root) > foo.js */ +// ] +// }, +// +// "frameTable": +// { +// "schema": +// { +// "location": 0, /* index into stringTable */ +// "relevantForJS": 1, /* bool */ +// "innerWindowID": 2, /* inner window ID of global JS `window` object */ +// "implementation": 3, /* index into stringTable */ +// "line": 4, /* number */ +// "column": 5, /* number */ +// "category": 6, /* index into profile.meta.categories */ +// "subcategory": 7 /* index into +// profile.meta.categories[category].subcategories */ +// }, +// "data": +// [ +// [ 0 ], /* { location: '(root)' } */ +// [ 1, null, null, 2 ] /* { location: 'foo.js', +// implementation: 'baseline' } */ +// ] +// }, +// +// "stringTable": +// [ +// "(root)", +// "foo.js", +// "baseline", +// "example marker" +// ] +// } +// +// Process: +// { +// "name": "Bar", +// "pid": 24, +// "threads": +// [ +// <0-N threads from above> +// ], +// "counters": /* includes the memory counter */ +// [ +// { +// "name": "qwerty", +// "category": "uiop", +// "description": "this is qwerty uiop", +// "sample_groups: +// [ +// { +// "id": 42, /* number (thread id, or object identifier (tab), etc) */ +// "samples: +// { +// "schema": +// { +// "time": 1, /* number */ +// "number": 2, /* number (of times the counter was touched) */ +// "count": 3 /* number (total for the counter) */ +// }, +// "data": +// [ +// [ 0.1, 1824, +// 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */ +// ] +// }, +// }, +// /* more sample-group objects with different id's */ +// ] +// }, +// /* more counters */ +// ], +// } +// +#endif /* ndef ProfileBufferEntry_h */ diff --git a/tools/profiler/core/ProfiledThreadData.cpp b/tools/profiler/core/ProfiledThreadData.cpp new file mode 100644 index 0000000000..febda0d85b --- /dev/null +++ b/tools/profiler/core/ProfiledThreadData.cpp @@ -0,0 +1,455 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfiledThreadData.h" + +#include "platform.h" +#include "ProfileBuffer.h" + +#include "mozilla/OriginAttributes.h" +#include "mozilla/Span.h" +#include "nsXULAppAPI.h" + +#if defined(GP_OS_darwin) +# include +#endif + +using namespace mozilla::literals::ProportionValue_literals; + +ProfiledThreadData::ProfiledThreadData( + const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo) + : mThreadInfo(aThreadInfo.Name(), aThreadInfo.ThreadId(), + aThreadInfo.IsMainThread(), aThreadInfo.RegisterTime()) { + MOZ_COUNT_CTOR(ProfiledThreadData); +} + +ProfiledThreadData::ProfiledThreadData( + mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo) + : mThreadInfo(std::move(aThreadInfo)) { + MOZ_COUNT_CTOR(ProfiledThreadData); +} + +ProfiledThreadData::~ProfiledThreadData() { + MOZ_COUNT_DTOR(ProfiledThreadData); +} + +static void StreamTables(UniqueStacks&& aUniqueStacks, JSContext* aCx, + SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, + mozilla::ProgressLogger aProgressLogger) { + aWriter.StartObjectProperty("stackTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("prefix"); + schema.WriteField("frame"); + } + + aWriter.StartArrayProperty("data"); + { + aProgressLogger.SetLocalProgress(1_pc, "Splicing stack table..."); + aUniqueStacks.SpliceStackTableElements(aWriter); + aProgressLogger.SetLocalProgress(30_pc, "Spliced stack table"); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("frameTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("location"); + schema.WriteField("relevantForJS"); + schema.WriteField("innerWindowID"); + schema.WriteField("implementation"); + schema.WriteField("line"); + schema.WriteField("column"); + schema.WriteField("category"); + schema.WriteField("subcategory"); + } + + aWriter.StartArrayProperty("data"); + { + aProgressLogger.SetLocalProgress(30_pc, "Splicing frame table..."); + aUniqueStacks.SpliceFrameTableElements(aWriter); + aProgressLogger.SetLocalProgress(60_pc, "Spliced frame table"); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartArrayProperty("stringTable"); + { + aProgressLogger.SetLocalProgress(60_pc, "Splicing string table..."); + std::move(aUniqueStacks.UniqueStrings()).SpliceStringTableElements(aWriter); + aProgressLogger.SetLocalProgress(90_pc, "Spliced string table"); + } + aWriter.EndArray(); +} + +mozilla::NotNull> +ProfiledThreadData::PrepareUniqueStacks( + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + if (mJITFrameInfoForPreviousJSContexts && + mJITFrameInfoForPreviousJSContexts->HasExpired( + aBuffer.BufferRangeStart())) { + mJITFrameInfoForPreviousJSContexts = nullptr; + } + aProgressLogger.SetLocalProgress(1_pc, "Checked JIT frame info presence"); + + // If we have an existing JITFrameInfo in mJITFrameInfoForPreviousJSContexts, + // copy the data from it. + JITFrameInfo jitFrameInfo = + mJITFrameInfoForPreviousJSContexts + ? JITFrameInfo(*mJITFrameInfoForPreviousJSContexts, + aProgressLogger.CreateSubLoggerTo( + "Retrieving JIT frame info...", 10_pc, + "Retrieved JIT frame info")) + : JITFrameInfo(); + + if (aCx && mBufferPositionWhenReceivedJSContext) { + aBuffer.AddJITInfoForRange( + *mBufferPositionWhenReceivedJSContext, mThreadInfo.ThreadId(), aCx, + jitFrameInfo, + aProgressLogger.CreateSubLoggerTo("Adding JIT info...", 90_pc, + "Added JIT info")); + } else { + aProgressLogger.SetLocalProgress(90_pc, "No JIT info"); + } + + return mozilla::MakeNotNull>( + aFailureLatch, std::move(jitFrameInfo), aService); +} + +void ProfiledThreadData::StreamJSON( + const ProfileBuffer& aBuffer, JSContext* aCx, SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + mozilla::NotNull> uniqueStacks = + PrepareUniqueStacks(aBuffer, aCx, aWriter.SourceFailureLatch(), aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Preparing unique stacks...", 10_pc, + "Prepared Unique stacks")); + + aWriter.SetUniqueStrings(uniqueStacks->UniqueStrings()); + + aWriter.Start(); + { + StreamSamplesAndMarkers( + mThreadInfo.Name(), mThreadInfo.ThreadId(), aBuffer, aWriter, + aProcessName, aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(), + mUnregisterTime, aSinceTime, *uniqueStacks, + aProgressLogger.CreateSubLoggerTo( + 90_pc, + "ProfiledThreadData::StreamJSON: Streamed samples and markers")); + + StreamTables(std::move(*uniqueStacks), aCx, aWriter, aProcessStartTime, + aProgressLogger.CreateSubLoggerTo( + 99_pc, "Streamed tables and trace logger")); + } + aWriter.End(); + + aWriter.ResetUniqueStrings(); +} + +void ProfiledThreadData::StreamJSON( + ThreadStreamingContext&& aThreadStreamingContext, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + aWriter.Start(); + { + StreamSamplesAndMarkers( + mThreadInfo.Name(), aThreadStreamingContext, aWriter, aProcessName, + aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(), + mUnregisterTime, + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "ProfiledThreadData::StreamJSON(context): Streaming...", + 90_pc, + "ProfiledThreadData::StreamJSON(context): Streamed samples and " + "markers")); + + StreamTables( + std::move(*aThreadStreamingContext.mUniqueStacks), + aThreadStreamingContext.mJSContext, aWriter, aProcessStartTime, + aProgressLogger.CreateSubLoggerTo( + "ProfiledThreadData::StreamJSON(context): Streaming tables...", + 99_pc, "ProfiledThreadData::StreamJSON(context): Streamed tables")); + } + aWriter.End(); +} + +// StreamSamplesDataCallback: (ProgressLogger) -> ProfilerThreadId +// StreamMarkersDataCallback: (ProgressLogger) -> void +// Returns the ProfilerThreadId returned by StreamSamplesDataCallback, which +// should be the thread id of the last sample that was processed (if any; +// otherwise it is left unspecified). This is mostly useful when the caller +// doesn't know where the sample comes from, e.g., when it's a backtrace in a +// marker. +template +ProfilerThreadId DoStreamSamplesAndMarkers( + const char* aName, SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger, + StreamSamplesDataCallback&& aStreamSamplesDataCallback, + StreamMarkersDataCallback&& aStreamMarkersDataCallback) { + ProfilerThreadId processedThreadId; + + aWriter.StringProperty("processType", + mozilla::MakeStringSpan(XRE_GetProcessTypeString())); + + aWriter.StringProperty("name", mozilla::MakeStringSpan(aName)); + + // Use given process name (if any), unless we're the parent process. + if (XRE_IsParentProcess()) { + aWriter.StringProperty("processName", "Parent Process"); + } else if (!aProcessName.IsEmpty()) { + aWriter.StringProperty("processName", aProcessName); + } + if (!aETLDplus1.IsEmpty()) { + nsAutoCString originNoSuffix; + mozilla::OriginAttributes attrs; + if (!attrs.PopulateFromOrigin(aETLDplus1, originNoSuffix)) { + aWriter.StringProperty("eTLD+1", aETLDplus1); + } else { + aWriter.StringProperty("eTLD+1", originNoSuffix); + aWriter.BoolProperty("isPrivateBrowsing", attrs.mPrivateBrowsingId > 0); + aWriter.IntProperty("userContextId", attrs.mUserContextId); + } + } + + if (aRegisterTime) { + aWriter.DoubleProperty( + "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("registerTime"); + } + + if (aUnregisterTime) { + aWriter.DoubleProperty( + "unregisterTime", + (aUnregisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("unregisterTime"); + } + + aWriter.StartObjectProperty("samples"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("stack"); + schema.WriteField("time"); + schema.WriteField("eventDelay"); +#define RUNNING_TIME_FIELD(index, name, unit, jsonProperty) \ + schema.WriteField(#jsonProperty); + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_FIELD) +#undef RUNNING_TIME_FIELD + } + + aWriter.StartArrayProperty("data"); + { + processedThreadId = std::forward( + aStreamSamplesDataCallback)(aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Streaming samples...", 49_pc, "Streamed samples")); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("markers"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("name"); + schema.WriteField("startTime"); + schema.WriteField("endTime"); + schema.WriteField("phase"); + schema.WriteField("category"); + schema.WriteField("data"); + } + + aWriter.StartArrayProperty("data"); + { + std::forward(aStreamMarkersDataCallback)( + aProgressLogger.CreateSubLoggerFromTo(50_pc, "Streaming markers...", + 99_pc, "Streamed markers")); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + // Tech note: If `ToNumber()` returns a uint64_t, the conversion to int64_t is + // "implementation-defined" before C++20. This is acceptable here, because + // this is a one-way conversion to a unique identifier that's used to visually + // separate data by thread on the front-end. + aWriter.IntProperty( + "pid", static_cast(profiler_current_process_id().ToNumber())); + aWriter.IntProperty("tid", + static_cast(processedThreadId.ToNumber())); + + return processedThreadId; +} + +ProfilerThreadId StreamSamplesAndMarkers( + const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, double aSinceTime, + UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger) { + return DoStreamSamplesAndMarkers( + aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime, + aRegisterTime, aUnregisterTime, std::move(aProgressLogger), + [&](mozilla::ProgressLogger aSubProgressLogger) { + ProfilerThreadId processedThreadId = aBuffer.StreamSamplesToJSON( + aWriter, aThreadId, aSinceTime, aUniqueStacks, + std::move(aSubProgressLogger)); + return aThreadId.IsSpecified() ? aThreadId : processedThreadId; + }, + [&](mozilla::ProgressLogger aSubProgressLogger) { + aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime, + aSinceTime, aUniqueStacks, + std::move(aSubProgressLogger)); + }); +} + +void StreamSamplesAndMarkers(const char* aName, + ThreadStreamingContext& aThreadData, + SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger) { + (void)DoStreamSamplesAndMarkers( + aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime, + aRegisterTime, aUnregisterTime, std::move(aProgressLogger), + [&](mozilla::ProgressLogger aSubProgressLogger) { + aWriter.TakeAndSplice( + aThreadData.mSamplesDataWriter.TakeChunkedWriteFunc()); + return aThreadData.mProfiledThreadData.Info().ThreadId(); + }, + [&](mozilla::ProgressLogger aSubProgressLogger) { + aWriter.TakeAndSplice( + aThreadData.mMarkersDataWriter.TakeChunkedWriteFunc()); + }); +} + +void ProfiledThreadData::NotifyAboutToLoseJSContext( + JSContext* aContext, const mozilla::TimeStamp& aProcessStartTime, + ProfileBuffer& aBuffer) { + if (!mBufferPositionWhenReceivedJSContext) { + return; + } + + MOZ_RELEASE_ASSERT(aContext); + + if (mJITFrameInfoForPreviousJSContexts && + mJITFrameInfoForPreviousJSContexts->HasExpired( + aBuffer.BufferRangeStart())) { + mJITFrameInfoForPreviousJSContexts = nullptr; + } + + mozilla::UniquePtr jitFrameInfo = + mJITFrameInfoForPreviousJSContexts + ? std::move(mJITFrameInfoForPreviousJSContexts) + : mozilla::MakeUnique(); + + aBuffer.AddJITInfoForRange(*mBufferPositionWhenReceivedJSContext, + mThreadInfo.ThreadId(), aContext, *jitFrameInfo, + mozilla::ProgressLogger{}); + + mJITFrameInfoForPreviousJSContexts = std::move(jitFrameInfo); + mBufferPositionWhenReceivedJSContext = mozilla::Nothing(); +} + +ThreadStreamingContext::ThreadStreamingContext( + ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer, + JSContext* aCx, mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) + : mProfiledThreadData(aProfiledThreadData), + mJSContext(aCx), + mSamplesDataWriter(aFailureLatch), + mMarkersDataWriter(aFailureLatch), + mUniqueStacks(mProfiledThreadData.PrepareUniqueStacks( + aBuffer, aCx, aFailureLatch, aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Preparing thread streaming context unique stacks...", + 99_pc, "Prepared thread streaming context Unique stacks"))) { + if (aFailureLatch.Failed()) { + return; + } + mSamplesDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings()); + mSamplesDataWriter.StartBareList(); + mMarkersDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings()); + mMarkersDataWriter.StartBareList(); +} + +void ThreadStreamingContext::FinalizeWriter() { + mSamplesDataWriter.EndBareList(); + mMarkersDataWriter.EndBareList(); +} + +ProcessStreamingContext::ProcessStreamingContext( + size_t aThreadCount, mozilla::FailureLatch& aFailureLatch, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime) + : mFailureLatch(aFailureLatch), + mProcessStartTime(aProcessStartTime), + mSinceTime(aSinceTime) { + if (mFailureLatch.Failed()) { + return; + } + if (!mTIDList.initCapacity(aThreadCount)) { + mFailureLatch.SetFailure( + "OOM in ProcessStreamingContext allocating TID list"); + return; + } + if (!mThreadStreamingContextList.initCapacity(aThreadCount)) { + mFailureLatch.SetFailure( + "OOM in ProcessStreamingContext allocating context list"); + mTIDList.clear(); + return; + } +} + +ProcessStreamingContext::~ProcessStreamingContext() { + if (mFailureLatch.Failed()) { + return; + } + MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length()); + MOZ_ASSERT(mTIDList.length() == mTIDList.capacity(), + "Didn't pre-allocate exactly right"); +} + +void ProcessStreamingContext::AddThreadStreamingContext( + ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer, + JSContext* aCx, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + if (mFailureLatch.Failed()) { + return; + } + MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length()); + MOZ_ASSERT(mTIDList.length() < mTIDList.capacity(), + "Didn't pre-allocate enough"); + mTIDList.infallibleAppend(aProfiledThreadData.Info().ThreadId()); + mThreadStreamingContextList.infallibleEmplaceBack( + aProfiledThreadData, aBuffer, aCx, mFailureLatch, aService, + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Prepared streaming thread id", 100_pc, + "Added thread streaming context")); +} diff --git a/tools/profiler/core/ProfiledThreadData.h b/tools/profiler/core/ProfiledThreadData.h new file mode 100644 index 0000000000..47ae0c579c --- /dev/null +++ b/tools/profiler/core/ProfiledThreadData.h @@ -0,0 +1,250 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfiledThreadData_h +#define ProfiledThreadData_h + +#include "platform.h" +#include "ProfileBuffer.h" +#include "ProfileBufferEntry.h" + +#include "mozilla/FailureLatch.h" +#include "mozilla/Maybe.h" +#include "mozilla/NotNull.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerThreadRegistrationInfo.h" +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "nsStringFwd.h" + +class nsIEventTarget; +class ProfilerCodeAddressService; +struct JSContext; +struct ThreadStreamingContext; + +// This class contains information about a thread that is only relevant while +// the profiler is running, for any threads (both alive and dead) whose thread +// name matches the "thread filter" in the current profiler run. +// ProfiledThreadData objects may be kept alive even after the thread is +// unregistered, as long as there is still data for that thread in the profiler +// buffer. +// +// Accesses to this class are protected by the profiler state lock. +// +// Created as soon as the following are true for the thread: +// - The profiler is running, and +// - the thread matches the profiler's thread filter, and +// - the thread is registered with the profiler. +// So it gets created in response to either (1) the profiler being started (for +// an existing registered thread) or (2) the thread being registered (if the +// profiler is already running). +// +// The thread may be unregistered during the lifetime of ProfiledThreadData. +// If that happens, NotifyUnregistered() is called. +// +// This class is the right place to store buffer positions. Profiler buffer +// positions become invalid if the profiler buffer is destroyed, which happens +// when the profiler is stopped. +class ProfiledThreadData final { + public: + explicit ProfiledThreadData( + const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo); + explicit ProfiledThreadData( + mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo); + ~ProfiledThreadData(); + + void NotifyUnregistered(uint64_t aBufferPosition) { + mLastSample = mozilla::Nothing(); + MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext, + "JSContext should have been cleared before the thread was " + "unregistered"); + mUnregisterTime = mozilla::TimeStamp::Now(); + mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition); + mPreviousThreadRunningTimes.Clear(); + } + mozilla::Maybe BufferPositionWhenUnregistered() { + return mBufferPositionWhenUnregistered; + } + + mozilla::Maybe& LastSample() { return mLastSample; } + + mozilla::NotNull> PrepareUniqueStacks( + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + void StreamJSON(const ProfileBuffer& aBuffer, JSContext* aCx, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + void StreamJSON(ThreadStreamingContext&& aThreadStreamingContext, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + const mozilla::profiler::ThreadRegistrationInfo& Info() const { + return mThreadInfo; + } + + void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) { + mBufferPositionWhenReceivedJSContext = + mozilla::Some(aCurrentBufferPosition); + } + + // Call this method when the JS entries inside the buffer are about to + // become invalid, i.e., just before JS shutdown. + void NotifyAboutToLoseJSContext(JSContext* aCx, + const mozilla::TimeStamp& aProcessStartTime, + ProfileBuffer& aBuffer); + + RunningTimes& PreviousThreadRunningTimesRef() { + return mPreviousThreadRunningTimes; + } + + private: + // Group A: + // The following fields are interesting for the entire lifetime of a + // ProfiledThreadData object. + + // This thread's thread info. Local copy because the one in ThreadRegistration + // may be destroyed while ProfiledThreadData stays alive. + const mozilla::profiler::ThreadRegistrationInfo mThreadInfo; + + // Contains JSON for JIT frames from any JSContexts that were used for this + // thread in the past. + // Null if this thread has never lost a JSContext or if all samples from + // previous JSContexts have been evicted from the profiler buffer. + mozilla::UniquePtr mJITFrameInfoForPreviousJSContexts; + + // Group B: + // The following fields are only used while this thread is alive and + // registered. They become Nothing() or empty once the thread is unregistered. + + // When sampling, this holds the position in ActivePS::mBuffer of the most + // recent sample for this thread, or Nothing() if there is no sample for this + // thread in the buffer. + mozilla::Maybe mLastSample; + + // Only non-Nothing() if the thread currently has a JSContext. + mozilla::Maybe mBufferPositionWhenReceivedJSContext; + + // RunningTimes at the previous sample if any, or empty. + RunningTimes mPreviousThreadRunningTimes; + + // Group C: + // The following fields are only used once this thread has been unregistered. + + mozilla::Maybe mBufferPositionWhenUnregistered; + mozilla::TimeStamp mUnregisterTime; +}; + +// This class will be used when outputting the profile data for one thread. +struct ThreadStreamingContext { + ProfiledThreadData& mProfiledThreadData; + JSContext* mJSContext; + SpliceableChunkedJSONWriter mSamplesDataWriter; + SpliceableChunkedJSONWriter mMarkersDataWriter; + mozilla::NotNull> mUniqueStacks; + + // These are updated when writing samples, and reused for "same-sample"s. + enum PreviousStackState { eNoStackYet, eStackWasNotEmpty, eStackWasEmpty }; + PreviousStackState mPreviousStackState = eNoStackYet; + uint32_t mPreviousStack = 0; + + ThreadStreamingContext(ProfiledThreadData& aProfiledThreadData, + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + void FinalizeWriter(); +}; + +// This class will be used when outputting the profile data for all threads. +class ProcessStreamingContext final : public mozilla::FailureLatch { + public: + // Pre-allocate space for `aThreadCount` threads. + ProcessStreamingContext(size_t aThreadCount, + mozilla::FailureLatch& aFailureLatch, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime); + + ~ProcessStreamingContext(); + + // Add the streaming context corresponding to each profiled thread. This + // should be called exactly the number of times specified in the constructor. + void AddThreadStreamingContext(ProfiledThreadData& aProfiledThreadData, + const ProfileBuffer& aBuffer, JSContext* aCx, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + // Retrieve the ThreadStreamingContext for a given thread id. + // Returns null if that thread id doesn't correspond to any profiled thread. + ThreadStreamingContext* GetThreadStreamingContext( + const ProfilerThreadId& aThreadId) { + for (size_t i = 0; i < mTIDList.length(); ++i) { + if (mTIDList[i] == aThreadId) { + return &mThreadStreamingContextList[i]; + } + } + return nullptr; + } + + const mozilla::TimeStamp& ProcessStartTime() const { + return mProcessStartTime; + } + + double GetSinceTime() const { return mSinceTime; } + + ThreadStreamingContext* begin() { + return mThreadStreamingContextList.begin(); + }; + ThreadStreamingContext* end() { return mThreadStreamingContextList.end(); }; + + FAILURELATCH_IMPL_PROXY(mFailureLatch) + + private: + // Separate list of thread ids, it's much faster to do a linear search + // here than a vector of bigger items like mThreadStreamingContextList. + mozilla::Vector mTIDList; + // Contexts corresponding to the thread id at the same indexes. + mozilla::Vector mThreadStreamingContextList; + + mozilla::FailureLatch& mFailureLatch; + + const mozilla::TimeStamp mProcessStartTime; + + const double mSinceTime; +}; + +// Stream all samples and markers from aBuffer with the given aThreadId (or 0 +// for everything, which is assumed to be a single backtrace sample.) +// Returns the thread id of the output sample(s), or 0 if none was present. +ProfilerThreadId StreamSamplesAndMarkers( + const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, double aSinceTime, + UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger); +void StreamSamplesAndMarkers(const char* aName, + ThreadStreamingContext& aThreadData, + SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger); + +#endif // ProfiledThreadData_h diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp new file mode 100644 index 0000000000..a264d85d64 --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.cpp @@ -0,0 +1,101 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerBacktrace.h" + +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" + +#include "mozilla/ProfileJSONWriter.h" + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + mozilla::UniquePtr + aProfileChunkedBufferStorage, + mozilla::UniquePtr + aProfileBufferStorageOrNull /* = nullptr */) + : mName(aName), + mOptionalProfileChunkedBufferStorage( + std::move(aProfileChunkedBufferStorage)), + mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()), + mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)), + mProfileBuffer(mOptionalProfileBufferStorage.get()) { + MOZ_COUNT_CTOR(ProfilerBacktrace); + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT(mProfileChunkedBuffer, + "If we take ownership of a ProfileBuffer, we must also " + "receive ownership of a ProfileChunkedBuffer"); + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we take ownership of a ProfileBuffer, we must also receive " + "ownership of its ProfileChunkedBuffer"); + } + MOZ_ASSERT( + !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer"); +} + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBuffer, + ProfileBuffer* aExternalProfileBuffer) + : mName(aName), + mProfileChunkedBuffer(aExternalProfileChunkedBuffer), + mProfileBuffer(aExternalProfileBuffer) { + MOZ_COUNT_CTOR(ProfilerBacktrace); + if (!mProfileChunkedBuffer) { + if (mProfileBuffer) { + // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use + // the latter's ProfileChunkedBuffer. + mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer(); + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } + } else { + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we reference both ProfileChunkedBuffer and ProfileBuffer, they " + "must already be connected"); + } + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } +} + +ProfilerBacktrace::~ProfilerBacktrace() { MOZ_COUNT_DTOR(ProfilerBacktrace); } + +ProfilerThreadId ProfilerBacktrace::StreamJSON( + SpliceableJSONWriter& aWriter, const mozilla::TimeStamp& aProcessStartTime, + UniqueStacks& aUniqueStacks) { + ProfilerThreadId processedThreadId; + + // Unlike ProfiledThreadData::StreamJSON, we don't need to call + // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain + // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded + // at sample time. + if (mProfileBuffer) { + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), ProfilerThreadId{}, *mProfileBuffer, aWriter, ""_ns, + ""_ns, aProcessStartTime, + /* aRegisterTime */ mozilla::TimeStamp(), + /* aUnregisterTime */ mozilla::TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{}); + } else if (mProfileChunkedBuffer) { + ProfileBuffer profileBuffer(*mProfileChunkedBuffer); + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), ProfilerThreadId{}, profileBuffer, aWriter, ""_ns, ""_ns, + aProcessStartTime, + /* aRegisterTime */ mozilla::TimeStamp(), + /* aUnregisterTime */ mozilla::TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{}); + } + // If there are no buffers, the backtrace is empty and nothing is streamed. + + return processedThreadId; +} diff --git a/tools/profiler/core/ProfilerBacktrace.h b/tools/profiler/core/ProfilerBacktrace.h new file mode 100644 index 0000000000..55811f4422 --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.h @@ -0,0 +1,184 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __PROFILER_BACKTRACE_H +#define __PROFILER_BACKTRACE_H + +#include "ProfileBuffer.h" + +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/UniquePtrExtensions.h" + +#include + +class ProfileBuffer; +class ProfilerCodeAddressService; +class ThreadInfo; +class UniqueStacks; + +namespace mozilla { +class ProfileChunkedBuffer; +class TimeStamp; +namespace baseprofiler { +class SpliceableJSONWriter; +} // namespace baseprofiler +} // namespace mozilla + +// ProfilerBacktrace encapsulates a synchronous sample. +// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they +// must already be linked together). The ProfileChunkedBuffer contains all the +// data; the ProfileBuffer is not strictly needed, only provide it if it is +// already available at the call site. +// And these buffers can either be: +// - owned here, so that the ProfilerBacktrace object can be kept for later +// use), OR +// - referenced through pointers (in cases where the backtrace is immediately +// streamed out, so we only need temporary references to external buffers); +// these pointers may be null for empty backtraces. +class ProfilerBacktrace { + public: + // Take ownership of external buffers and use them to keep, and to stream a + // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must + // be provided as well. + explicit ProfilerBacktrace( + const char* aName, + mozilla::UniquePtr + aProfileChunkedBufferStorage, + mozilla::UniquePtr aProfileBufferStorageOrNull = nullptr); + + // Take pointers to external buffers and use them to stream a backtrace. + // If null, the backtrace is effectively empty. + // If both are provided, they must already be connected. + explicit ProfilerBacktrace( + const char* aName, + mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = + nullptr, + ProfileBuffer* aExternalProfileBufferOrNull = nullptr); + + ~ProfilerBacktrace(); + + [[nodiscard]] bool IsEmpty() const { + return !mProfileChunkedBuffer || + mozilla::ProfileBufferEntryWriter::Serializer< + mozilla::ProfileChunkedBuffer>::Bytes(*mProfileChunkedBuffer) <= + mozilla::ULEB128Size(0u); + } + + // ProfilerBacktraces' stacks are deduplicated in the context of the + // profile that contains the backtrace as a marker payload. + // + // That is, markers that contain backtraces should not need their own stack, + // frame, and string tables. They should instead reuse their parent + // profile's tables. + ProfilerThreadId StreamJSON( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, UniqueStacks& aUniqueStacks); + + private: + // Used to serialize a ProfilerBacktrace. + friend struct mozilla::ProfileBufferEntryWriter::Serializer< + ProfilerBacktrace>; + friend struct mozilla::ProfileBufferEntryReader::Deserializer< + ProfilerBacktrace>; + + std::string mName; + + // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be + // located before `mProfileBuffer` so that it's destroyed after. + mozilla::UniquePtr + mOptionalProfileChunkedBufferStorage; + // If null, there is no need to check mProfileBuffer's (if present) underlying + // buffer because this is done when constructed. + mozilla::ProfileChunkedBuffer* mProfileChunkedBuffer; + + mozilla::UniquePtr mOptionalProfileBufferStorage; + ProfileBuffer* mProfileBuffer; +}; + +namespace mozilla { + +// Format: [ UniquePtr | name ] +// Initial len==0 marks a nullptr or empty backtrace. +template <> +struct mozilla::ProfileBufferEntryWriter::Serializer { + static Length Bytes(const ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer) { + // No buffer. + return ULEB128Size(0u); + } + auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer); + if (bufferBytes <= ULEB128Size(0u)) { + // Empty buffer. + return ULEB128Size(0u); + } + return bufferBytes + SumBytes(aBacktrace.mName); + } + + static void Write(mozilla::ProfileBufferEntryWriter& aEW, + const ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer || + SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) { + // No buffer, or empty buffer. + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer); + aEW.WriteObject(aBacktrace.mName); + } +}; + +template +struct mozilla::ProfileBufferEntryWriter::Serializer< + mozilla::UniquePtr> { + static Length Bytes( + const mozilla::UniquePtr& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + return ULEB128Size(0u); + } + return SumBytes(*aBacktrace); + } + + static void Write( + mozilla::ProfileBufferEntryWriter& aEW, + const mozilla::UniquePtr& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace); + } +}; + +template +struct mozilla::ProfileBufferEntryReader::Deserializer< + mozilla::UniquePtr> { + static void ReadInto( + mozilla::ProfileBufferEntryReader& aER, + mozilla::UniquePtr& aBacktrace) { + aBacktrace = Read(aER); + } + + static mozilla::UniquePtr Read( + mozilla::ProfileBufferEntryReader& aER) { + auto profileChunkedBuffer = + aER.ReadObject>(); + if (!profileChunkedBuffer) { + return nullptr; + } + MOZ_ASSERT( + !profileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers"); + std::string name = aER.ReadObject(); + return UniquePtr{ + new ProfilerBacktrace(name.c_str(), std::move(profileChunkedBuffer))}; + } +}; + +} // namespace mozilla + +#endif // __PROFILER_BACKTRACE_H diff --git a/tools/profiler/core/ProfilerBindings.cpp b/tools/profiler/core/ProfilerBindings.cpp new file mode 100644 index 0000000000..c3af5c5b56 --- /dev/null +++ b/tools/profiler/core/ProfilerBindings.cpp @@ -0,0 +1,386 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* FFI functions for Profiler Rust API to call into profiler */ + +#include "ProfilerBindings.h" + +#include "GeckoProfiler.h" + +#include +#include + +void gecko_profiler_register_thread(const char* aName) { + PROFILER_REGISTER_THREAD(aName); +} + +void gecko_profiler_unregister_thread() { PROFILER_UNREGISTER_THREAD(); } + +void gecko_profiler_construct_label(mozilla::AutoProfilerLabel* aAutoLabel, + JS::ProfilingCategoryPair aCategoryPair) { +#ifdef MOZ_GECKO_PROFILER + new (aAutoLabel) mozilla::AutoProfilerLabel( + "", nullptr, aCategoryPair, + uint32_t( + js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)); +#endif +} + +void gecko_profiler_destruct_label(mozilla::AutoProfilerLabel* aAutoLabel) { +#ifdef MOZ_GECKO_PROFILER + aAutoLabel->~AutoProfilerLabel(); +#endif +} + +void gecko_profiler_construct_timestamp_now(mozilla::TimeStamp* aTimeStamp) { + new (aTimeStamp) mozilla::TimeStamp(mozilla::TimeStamp::Now()); +} + +void gecko_profiler_clone_timestamp(const mozilla::TimeStamp* aSrcTimeStamp, + mozilla::TimeStamp* aDestTimeStamp) { + new (aDestTimeStamp) mozilla::TimeStamp(*aSrcTimeStamp); +} + +void gecko_profiler_destruct_timestamp(mozilla::TimeStamp* aTimeStamp) { + aTimeStamp->~TimeStamp(); +} + +void gecko_profiler_add_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds) { + new (aDestTimeStamp) mozilla::TimeStamp( + *aTimeStamp + mozilla::TimeDuration::FromMicroseconds(aMicroseconds)); +} + +void gecko_profiler_subtract_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds) { + new (aDestTimeStamp) mozilla::TimeStamp( + *aTimeStamp - mozilla::TimeDuration::FromMicroseconds(aMicroseconds)); +} + +void gecko_profiler_construct_marker_timing_instant_at( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct(aMarkerTiming, *aTime, + mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::Instant); +#endif +} + +void gecko_profiler_construct_marker_timing_instant_now( + mozilla::MarkerTiming* aMarkerTiming) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, mozilla::TimeStamp::Now(), mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::Instant); +#endif +} + +void gecko_profiler_construct_marker_timing_interval( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime, + const mozilla::TimeStamp* aEndTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aStartTime, *aEndTime, + mozilla::MarkerTiming::Phase::Interval); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_until_now_from( + mozilla::MarkerTiming* aMarkerTiming, + const mozilla::TimeStamp* aStartTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aStartTime, mozilla::TimeStamp::Now(), + mozilla::MarkerTiming::Phase::Interval); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_start( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aTime, mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::IntervalStart); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_end( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, mozilla::TimeStamp{}, *aTime, + mozilla::MarkerTiming::Phase::IntervalEnd); +#endif +} + +void gecko_profiler_destruct_marker_timing( + mozilla::MarkerTiming* aMarkerTiming) { +#ifdef MOZ_GECKO_PROFILER + aMarkerTiming->~MarkerTiming(); +#endif +} + +void gecko_profiler_construct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema, + const mozilla::MarkerSchema::Location* aLocations, size_t aLength) { +#ifdef MOZ_GECKO_PROFILER + new (aMarkerSchema) mozilla::MarkerSchema(aLocations, aLength); +#endif +} + +void gecko_profiler_construct_marker_schema_with_special_front_end_location( + mozilla::MarkerSchema* aMarkerSchema) { +#ifdef MOZ_GECKO_PROFILER + new (aMarkerSchema) + mozilla::MarkerSchema(mozilla::MarkerSchema::SpecialFrontendLocation{}); +#endif +} + +void gecko_profiler_destruct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema) { +#ifdef MOZ_GECKO_PROFILER + aMarkerSchema->~MarkerSchema(); +#endif +} + +void gecko_profiler_marker_schema_set_chart_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetChartLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_tooltip_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetTooltipLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_table_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetTableLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_all_labels(mozilla::MarkerSchema* aSchema, + const char* aLabel, + size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetAllLabels(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_add_key_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyFormat(std::string(aKey, aKeyLength), aFormat); +#endif +} + +void gecko_profiler_marker_schema_add_key_label_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyLabelFormat(std::string(aKey, aKeyLength), + std::string(aLabel, aLabelLength), aFormat); +#endif +} + +void gecko_profiler_marker_schema_add_key_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyFormatSearchable(std::string(aKey, aKeyLength), aFormat, + aSearchable); +#endif +} + +void gecko_profiler_marker_schema_add_key_label_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyLabelFormatSearchable(std::string(aKey, aKeyLength), + std::string(aLabel, aLabelLength), + aFormat, aSearchable); +#endif +} + +void gecko_profiler_marker_schema_add_static_label_value( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength, + const char* aValue, size_t aValueLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddStaticLabelValue(std::string(aLabel, aLabelLength), + std::string(aValue, aValueLength)); +#endif +} + +void gecko_profiler_marker_schema_stream( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, mozilla::MarkerSchema* aMarkerSchema, + void* aStreamedNamesSet) { +#ifdef MOZ_GECKO_PROFILER + auto* streamedNames = static_cast*>(aStreamedNamesSet); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.). + const bool didInsert = + streamedNames->insert(std::string(aName, aNameLength)).second; + if (didInsert) { + std::move(*aMarkerSchema) + .Stream(*aWriter, mozilla::Span(aName, aNameLength)); + } +#endif +} + +void gecko_profiler_json_writer_int_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, int64_t aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->IntProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} + +void gecko_profiler_json_writer_float_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, double aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->DoubleProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} + +void gecko_profiler_json_writer_bool_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, bool aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->BoolProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} +void gecko_profiler_json_writer_string_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, const char* aValue, size_t aValueLength) { +#ifdef MOZ_GECKO_PROFILER + aWriter->StringProperty(mozilla::Span(aName, aNameLength), + mozilla::Span(aValue, aValueLength)); +#endif +} + +void gecko_profiler_json_writer_null_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength) { +#ifdef MOZ_GECKO_PROFILER + aWriter->NullProperty(mozilla::Span(aName, aNameLength)); +#endif +} + +void gecko_profiler_add_marker_untyped( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions) { +#ifdef MOZ_GECKO_PROFILER + profiler_add_marker( + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::MarkerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions))); +#endif +} + +void gecko_profiler_add_marker_text( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, const char* aText, + size_t aTextLength) { +#ifdef MOZ_GECKO_PROFILER + profiler_add_marker( + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::MarkerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)), + geckoprofiler::markers::TextMarker{}, + mozilla::ProfilerString8View(aText, aTextLength)); +#endif +} + +void gecko_profiler_add_marker( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, uint8_t aMarkerTag, + const uint8_t* aPayload, size_t aPayloadSize) { +#ifdef MOZ_GECKO_PROFILER + // Copy the marker timing and create the marker option. + mozilla::MarkerOptions markerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)); + + // Currently it's not possible to add a threadId option, but we will + // have it soon. + if (markerOptions.ThreadId().IsUnspecified()) { + // If yet unspecified, set thread to this thread where the marker is added. + markerOptions.Set(mozilla::MarkerThreadId::CurrentThread()); + } + + auto& buffer = profiler_get_core_buffer(); + mozilla::Span payload(aPayload, aPayloadSize); + + mozilla::StackCaptureOptions captureOptions = + markerOptions.Stack().CaptureOptions(); + if (captureOptions != mozilla::StackCaptureOptions::NoStack && + // Do not capture a stack if the NoMarkerStacks feature is set. + profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) { + // A capture was requested, let's attempt to do it here&now. This avoids a + // lot of allocations that would be necessary if capturing a backtrace + // separately. + // TODO use a local on-stack byte buffer to remove last allocation. + // TODO reduce internal profiler stack levels, see bug 1659872. + mozilla::ProfileBufferChunkManagerSingle chunkManager( + mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize); + mozilla::ProfileChunkedBuffer chunkedBuffer( + mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + chunkManager); + markerOptions.StackRef().UseRequestedBacktrace( + profiler_capture_backtrace_into(chunkedBuffer, captureOptions) + ? &chunkedBuffer + : nullptr); + + // This call must be made from here, while chunkedBuffer is in scope. + buffer.PutObjects( + mozilla::ProfileBufferEntryKind::Marker, markerOptions, + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::base_profiler_markers_detail::Streaming::DeserializerTag( + aMarkerTag), + mozilla::MarkerPayloadType::Rust, payload); + return; + } + + buffer.PutObjects( + mozilla::ProfileBufferEntryKind::Marker, markerOptions, + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::base_profiler_markers_detail::Streaming::DeserializerTag( + aMarkerTag), + mozilla::MarkerPayloadType::Rust, payload); +#endif +} diff --git a/tools/profiler/core/ProfilerCodeAddressService.cpp b/tools/profiler/core/ProfilerCodeAddressService.cpp new file mode 100644 index 0000000000..5a65e06379 --- /dev/null +++ b/tools/profiler/core/ProfilerCodeAddressService.cpp @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerCodeAddressService.h" + +#include "platform.h" +#include "mozilla/StackWalk.h" + +using namespace mozilla; + +#if defined(XP_LINUX) || defined(XP_FREEBSD) +static char* SearchSymbolTable(SymbolTable& aTable, uint32_t aOffset) { + size_t index; + bool exact = + BinarySearch(aTable.mAddrs, 0, aTable.mAddrs.Length(), aOffset, &index); + + if (index == 0 && !exact) { + // Our offset is before the first symbol in the table; no result. + return nullptr; + } + + // Extract the (mangled) symbol name out of the string table. + auto strings = reinterpret_cast(aTable.mBuffer.Elements()); + nsCString symbol; + symbol.Append(strings + aTable.mIndex[index - 1], + aTable.mIndex[index] - aTable.mIndex[index - 1]); + + // First try demangling as a Rust identifier. + char demangled[1024]; + if (!profiler_demangle_rust(symbol.get(), demangled, + ArrayLength(demangled))) { + // Then as a C++ identifier. + DemangleSymbol(symbol.get(), demangled, ArrayLength(demangled)); + } + demangled[ArrayLength(demangled) - 1] = '\0'; + + // Use the mangled name if we didn't successfully demangle. + return strdup(demangled[0] != '\0' ? demangled : symbol.get()); +} +#endif + +bool ProfilerCodeAddressService::GetFunction(const void* aPc, + nsACString& aResult) { + Entry& entry = GetEntry(aPc); + +#if defined(XP_LINUX) || defined(XP_FREEBSD) + // On Linux, most symbols will not be found by the MozDescribeCodeAddress call + // that GetEntry does. So we read the symbol table directly from the ELF + // image. + + // SymbolTable currently assumes library offsets will not be larger than + // 4 GiB. + if (entry.mLOffset <= 0xFFFFFFFF && !entry.mFunction) { + auto p = mSymbolTables.lookupForAdd(entry.mLibrary); + if (!p) { + if (!mSymbolTables.add(p, entry.mLibrary, SymbolTable())) { + MOZ_CRASH("ProfilerCodeAddressService OOM"); + } + profiler_get_symbol_table(entry.mLibrary, nullptr, &p->value()); + } + entry.mFunction = + SearchSymbolTable(p->value(), static_cast(entry.mLOffset)); + } +#endif + + if (!entry.mFunction || entry.mFunction[0] == '\0') { + return false; + } + + aResult = nsDependentCString(entry.mFunction); + return true; +} diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp new file mode 100644 index 0000000000..7c299678d1 --- /dev/null +++ b/tools/profiler/core/ProfilerMarkers.cpp @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerMarkers.h" + +template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::NoPayload); + +template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::TextMarker, const std::string&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const std::string&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const nsCString&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::Tracing, + const mozilla::ProfilerString8View&); diff --git a/tools/profiler/core/ProfilerThreadRegistration.cpp b/tools/profiler/core/ProfilerThreadRegistration.cpp new file mode 100644 index 0000000000..c81d00573d --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistration.cpp @@ -0,0 +1,198 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistration.h" + +#include "mozilla/ProfilerMarkers.h" +#include "mozilla/ProfilerThreadRegistry.h" +#include "nsString.h" +#ifdef MOZ_GECKO_PROFILER +# include "platform.h" +#else +# define profiler_mark_thread_awake() +# define profiler_mark_thread_asleep() +#endif + +namespace mozilla::profiler { + +/* static */ +MOZ_THREAD_LOCAL(ThreadRegistration*) ThreadRegistration::tlsThreadRegistration; + +ThreadRegistration::ThreadRegistration(const char* aName, const void* aStackTop) + : mData(aName, aStackTop) { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + // This is a nested ThreadRegistration object, so the thread is already + // registered in the TLS and ThreadRegistry and we don't need to register + // again. + MOZ_ASSERT( + mData.Info().ThreadId() == rootRegistration->mData.Info().ThreadId(), + "Thread being re-registered has changed its TID"); + // TODO: Use new name. This is currently not possible because the + // TLS-stored RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + PROFILER_MARKER_TEXT("Nested ThreadRegistration()", OTHER_Profiling, + MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(aName)); + return; + } + + tls->set(this); + ThreadRegistry::Register(OnThreadRef{*this}); + profiler_mark_thread_awake(); +} + +ThreadRegistration::~ThreadRegistration() { + MOZ_ASSERT(profiler_current_thread_id() == mData.mInfo.ThreadId(), + "ThreadRegistration must be destroyed on its thread"); + MOZ_ASSERT(!mDataMutex.IsLockedOnCurrentThread(), + "Mutex shouldn't be locked here, as it's about to be destroyed " + "in ~ThreadRegistration()"); + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + if (rootRegistration != this) { + // `this` is not in the TLS, so it was a nested registration, there is + // nothing to unregister yet. + PROFILER_MARKER_TEXT( + "Nested ~ThreadRegistration()", OTHER_Profiling, MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(mData.Info().Name())); + return; + } + + profiler_mark_thread_asleep(); +#ifdef NIGHTLY_BUILD + mData.RecordWakeCount(); +#endif + ThreadRegistry::Unregister(OnThreadRef{*this}); +#ifdef DEBUG + // After ThreadRegistry::Unregister, other threads should not be able to + // find this ThreadRegistration, and shouldn't have kept any reference to + // it across the ThreadRegistry mutex. + MOZ_ASSERT(mDataMutex.TryLock(), + "Mutex shouldn't be locked in any thread, as it's about to be " + "destroyed in ~ThreadRegistration()"); + // Undo the above successful TryLock. + mDataMutex.Unlock(); +#endif // DEBUG + + tls->set(nullptr); + return; + } + + // Already removed from the TLS!? This could happen with improperly-nested + // register/unregister calls, and the first ThreadRegistration has already + // been unregistered. + // We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (!profiler_is_main_thread()) { + nsAutoCString threadId("thread id: "); + threadId.AppendInt(profiler_current_thread_id().ToNumber()); + threadId.AppendLiteral(", name: \""); + threadId.AppendASCII(mData.Info().Name()); + threadId.AppendLiteral("\""); + PROFILER_MARKER_TEXT( + "~ThreadRegistration() but TLS is empty", OTHER_Profiling, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + threadId); + } +} + +/* static */ +ProfilingStack* ThreadRegistration::RegisterThread(const char* aName, + const void* aStackTop) { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return nullptr; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + // Already registered, record the extra depth to ignore the matching + // UnregisterThread. + ++rootRegistration->mOtherRegistrations; + // TODO: Use new name. This is currently not possible because the + // TLS-stored RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + PROFILER_MARKER_TEXT("Nested ThreadRegistration::RegisterThread()", + OTHER_Profiling, MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(aName)); + return &rootRegistration->mData.mProfilingStack; + } + + // Create on heap, it self-registers with the TLS (its effective owner, so + // we can forget the pointer after this), and with the Profiler. + ThreadRegistration* tr = new ThreadRegistration(aName, aStackTop); + tr->mIsOnHeap = true; + return &tr->mData.mProfilingStack; +} + +/* static */ +void ThreadRegistration::UnregisterThread() { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + if (rootRegistration->mOtherRegistrations != 0) { + // This is assumed to be a matching UnregisterThread() for a nested + // RegisterThread(). Decrease depth and we're done. + --rootRegistration->mOtherRegistrations; + // We don't know what name was used in the related RegisterThread(). + PROFILER_MARKER_UNTYPED("Nested ThreadRegistration::UnregisterThread()", + OTHER_Profiling); + return; + } + + if (!rootRegistration->mIsOnHeap) { + // The root registration was not added by `RegisterThread()`, so it + // shouldn't be deleted! + // This could happen if there are un-paired `UnregisterThread` calls when + // the initial registration (still alive) was done on the stack. We don't + // know what name was used in the related RegisterThread(). + PROFILER_MARKER_UNTYPED("Excess ThreadRegistration::UnregisterThread()", + OTHER_Profiling, MarkerStack::Capture()); + return; + } + + // This is the last `UnregisterThread()` that should match the first + // `RegisterThread()` that created this ThreadRegistration on the heap. + // Just delete this root registration, it will de-register itself from the + // TLS (and from the Profiler). + delete rootRegistration; + return; + } + + // There is no known ThreadRegistration for this thread, ignore this + // request. We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (!profiler_is_main_thread()) { + nsAutoCString threadId("thread id: "); + threadId.AppendInt(profiler_current_thread_id().ToNumber()); + PROFILER_MARKER_TEXT( + "ThreadRegistration::UnregisterThread() but TLS is empty", + OTHER_Profiling, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + threadId); + } +} + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerThreadRegistrationData.cpp b/tools/profiler/core/ProfilerThreadRegistrationData.cpp new file mode 100644 index 0000000000..e70f9e749a --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistrationData.cpp @@ -0,0 +1,303 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistrationData.h" + +#include "mozilla/FOGIPC.h" +#include "mozilla/glean/GleanMetrics.h" +#include "mozilla/ProfilerMarkers.h" +#include "js/AllocationRecording.h" +#include "js/ProfilingStack.h" + +#if defined(XP_WIN) +# include +#elif defined(XP_DARWIN) +# include +#endif + +#ifdef NIGHTLY_BUILD +namespace geckoprofiler::markers { + +using namespace mozilla; + +struct ThreadCpuUseMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("ThreadCpuUse"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + ProfilerThreadId aThreadId, + int64_t aCpuTimeMs, int64_t aWakeUps, + const ProfilerString8View& aThreadName) { + aWriter.IntProperty("threadId", static_cast(aThreadId.ToNumber())); + aWriter.IntProperty("time", aCpuTimeMs); + aWriter.IntProperty("wakeups", aWakeUps); + aWriter.StringProperty("label", aThreadName); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyLabelFormat("time", "CPU Time", MS::Format::Milliseconds); + schema.AddKeyLabelFormat("wakeups", "Wake ups", MS::Format::Integer); + schema.SetTooltipLabel("{marker.name} - {marker.data.label}"); + schema.SetTableLabel( + "{marker.name} - {marker.data.label}: {marker.data.time} of CPU time, " + "{marker.data.wakeups} wake ups"); + return schema; + } +}; + +} // namespace geckoprofiler::markers +#endif + +namespace mozilla::profiler { + +ThreadRegistrationData::ThreadRegistrationData(const char* aName, + const void* aStackTop) + : mInfo(aName), + mPlatformData(mInfo.ThreadId()), + mStackTop( +#if defined(XP_WIN) + // We don't have to guess on Windows. + reinterpret_cast( + reinterpret_cast(NtCurrentTeb())->StackBase) +#elif defined(XP_DARWIN) + // We don't have to guess on Mac/Darwin. + reinterpret_cast( + pthread_get_stackaddr_np(pthread_self())) +#else + // Otherwise use the given guess. + aStackTop +#endif + ) { +} + +// This is a simplified version of profiler_add_marker that can be easily passed +// into the JS engine. +static void profiler_add_js_marker(const char* aMarkerName, + const char* aMarkerText) { + PROFILER_MARKER_TEXT( + mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, + {}, mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerText)); +} + +static void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info) { + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + struct JsAllocationMarker { + static constexpr mozilla::Span MarkerTypeName() { + return mozilla::MakeStringSpan("JS allocation"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const mozilla::ProfilerString16View& aTypeName, + const mozilla::ProfilerString8View& aClassName, + const mozilla::ProfilerString16View& aDescriptiveTypeName, + const mozilla::ProfilerString8View& aCoarseType, uint64_t aSize, + bool aInNursery) { + if (aClassName.Length() != 0) { + aWriter.StringProperty("className", aClassName); + } + if (aTypeName.Length() != 0) { + aWriter.StringProperty("typeName", NS_ConvertUTF16toUTF8(aTypeName)); + } + if (aDescriptiveTypeName.Length() != 0) { + aWriter.StringProperty("descriptiveTypeName", + NS_ConvertUTF16toUTF8(aDescriptiveTypeName)); + } + aWriter.StringProperty("coarseType", aCoarseType); + aWriter.IntProperty("size", aSize); + aWriter.BoolProperty("inNursery", aInNursery); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + + profiler_add_marker( + "JS allocation", geckoprofiler::category::JS, + mozilla::MarkerStack::Capture(), JsAllocationMarker{}, + mozilla::ProfilerString16View::WrapNullTerminatedString(info.typeName), + mozilla::ProfilerString8View::WrapNullTerminatedString(info.className), + mozilla::ProfilerString16View::WrapNullTerminatedString( + info.descriptiveTypeName), + mozilla::ProfilerString8View::WrapNullTerminatedString(info.coarseType), + info.size, info.inNursery); +} + +void ThreadRegistrationLockedRWFromAnyThread::SetProfilingFeaturesAndData( + ThreadProfilingFeatures aProfilingFeatures, + ProfiledThreadData* aProfiledThreadData, const PSAutoLock&) { + MOZ_ASSERT(mProfilingFeatures == ThreadProfilingFeatures::NotProfiled); + mProfilingFeatures = aProfilingFeatures; + + MOZ_ASSERT(!mProfiledThreadData); + MOZ_ASSERT(aProfiledThreadData); + mProfiledThreadData = aProfiledThreadData; + + if (mJSContext) { + // The thread is now being profiled, and we already have a JSContext, + // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling. + MOZ_ASSERT(!mJsFrameBuffer); + mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES]; + } + + // Check invariants. + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWFromAnyThread::ClearProfilingFeaturesAndData( + const PSAutoLock&) { + mProfilingFeatures = ThreadProfilingFeatures::NotProfiled; + mProfiledThreadData = nullptr; + + if (mJsFrameBuffer) { + delete[] mJsFrameBuffer; + mJsFrameBuffer = nullptr; + } + + // Check invariants. + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) { + MOZ_ASSERT(aJSContext && !mJSContext); + + mJSContext = aJSContext; + + if (mProfiledThreadData) { + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + // We now have a JSContext, and the thread is already being profiled, + // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling. + MOZ_ASSERT(!mJsFrameBuffer); + mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES]; + } + + // We give the JS engine a non-owning reference to the ProfilingStack. It's + // important that the JS engine doesn't touch this once the thread dies. + js::SetContextProfilingStack(aJSContext, &ProfilingStackRef()); + + // Check invariants. + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::ClearJSContext() { + mJSContext = nullptr; + + if (mJsFrameBuffer) { + delete[] mJsFrameBuffer; + mJsFrameBuffer = nullptr; + } + + // Check invariants. + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::PollJSSampling() { + // We can't start/stop profiling until we have the thread's JSContext. + if (mJSContext) { + // It is possible for mJSSampling to go through the following sequences. + // + // - INACTIVE, ACTIVE_REQUESTED, INACTIVE_REQUESTED, INACTIVE + // + // - ACTIVE, INACTIVE_REQUESTED, ACTIVE_REQUESTED, ACTIVE + // + // Therefore, the if and else branches here aren't always interleaved. + // This is ok because the JS engine can handle that. + // + if (mJSSampling == ACTIVE_REQUESTED) { + mJSSampling = ACTIVE; + js::EnableContextProfilingStack(mJSContext, true); + + if (JSAllocationsEnabled()) { + // TODO - This probability should not be hardcoded. See Bug 1547284. + JS::EnableRecordingAllocations(mJSContext, + profiler_add_js_allocation_marker, 0.01); + } + js::RegisterContextProfilingEventMarker(mJSContext, + profiler_add_js_marker); + + } else if (mJSSampling == INACTIVE_REQUESTED) { + mJSSampling = INACTIVE; + js::EnableContextProfilingStack(mJSContext, false); + + if (JSAllocationsEnabled()) { + JS::DisableRecordingAllocations(mJSContext); + } + } + } +} + +#ifdef NIGHTLY_BUILD +void ThreadRegistrationUnlockedConstReaderAndAtomicRW::RecordWakeCount() const { + baseprofiler::detail::BaseProfilerAutoLock lock(mRecordWakeCountMutex); + + uint64_t newWakeCount = mWakeCount - mAlreadyRecordedWakeCount; + if (newWakeCount == 0 && mSleep != AWAKE) { + // If no new wake-up was counted, and the thread is not marked awake, + // we can be pretty sure there is no CPU activity to record. + // Threads that are never annotated as asleep/awake (typically rust threads) + // start as awake. + return; + } + + uint64_t cpuTimeNs; + if (!GetCpuTimeSinceThreadStartInNs(&cpuTimeNs, PlatformDataCRef())) { + cpuTimeNs = 0; + } + + constexpr uint64_t NS_PER_MS = 1'000'000; + uint64_t cpuTimeMs = cpuTimeNs / NS_PER_MS; + + uint64_t newCpuTimeMs = MOZ_LIKELY(cpuTimeMs > mAlreadyRecordedCpuTimeInMs) + ? cpuTimeMs - mAlreadyRecordedCpuTimeInMs + : 0; + + if (!newWakeCount && !newCpuTimeMs) { + // Nothing to report, avoid computing the Glean friendly thread name. + return; + } + + nsAutoCString threadName(mInfo.Name()); + // Trim the trailing number of threads that are part of a thread pool. + for (size_t length = threadName.Length(); length > 0; --length) { + const char c = threadName.CharAt(length - 1); + if ((c < '0' || c > '9') && c != '#' && c != ' ') { + if (length != threadName.Length()) { + threadName.SetLength(length); + } + break; + } + } + + mozilla::glean::RecordThreadCpuUse(threadName, newCpuTimeMs, newWakeCount); + + // The thread id is provided as part of the payload because this call is + // inside a ThreadRegistration data function, which could be invoked with + // the ThreadRegistry locked. We cannot call any function/option that could + // attempt to lock the ThreadRegistry again, like MarkerThreadId. + PROFILER_MARKER("Thread CPU use", OTHER, {}, ThreadCpuUseMarker, + mInfo.ThreadId(), newCpuTimeMs, newWakeCount, threadName); + mAlreadyRecordedCpuTimeInMs = cpuTimeMs; + mAlreadyRecordedWakeCount += newWakeCount; +} +#endif + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerThreadRegistry.cpp b/tools/profiler/core/ProfilerThreadRegistry.cpp new file mode 100644 index 0000000000..cb456471d9 --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistry.cpp @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistry.h" + +namespace mozilla::profiler { + +/* static */ +ThreadRegistry::RegistryContainer ThreadRegistry::sRegistryContainer; + +/* static */ +ThreadRegistry::RegistryMutex ThreadRegistry::sRegistryMutex; + +#if !defined(MOZ_GECKO_PROFILER) +// When MOZ_GECKO_PROFILER is not defined, the function definitions in +// platform.cpp are not built, causing link errors. So we keep these simple +// definitions here. + +/* static */ +void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) { + LockedRegistry lock; + MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef})); +} + +/* static */ +void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) { + LockedRegistry lock; + for (OffThreadRef& thread : sRegistryContainer) { + if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) { + sRegistryContainer.erase(&thread); + break; + } + } +} +#endif // !defined(MOZ_GECKO_PROFILER) + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerUtils.cpp b/tools/profiler/core/ProfilerUtils.cpp new file mode 100644 index 0000000000..6a46878ad7 --- /dev/null +++ b/tools/profiler/core/ProfilerUtils.cpp @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file implements functions from ProfilerUtils.h on all platforms. +// Functions with platform-specific implementations are separated in #if blocks +// below, with each block being self-contained with all the #includes and +// definitions it needs, to keep platform code easier to maintain in isolation. + +#include "mozilla/ProfilerUtils.h" + +// --------------------------------------------- Windows process & thread ids +#if defined(XP_WIN) + +# include +# include + +ProfilerProcessId profiler_current_process_id() { + return ProfilerProcessId::FromNativeId(_getpid()); +} + +ProfilerThreadId profiler_current_thread_id() { + static_assert(std::is_same_v, + "ProfilerThreadId::NativeType must be exactly the type " + "returned by GetCurrentThreadId()"); + return ProfilerThreadId::FromNativeId(GetCurrentThreadId()); +} + +// --------------------------------------------- Non-Windows process id +#else +// All non-Windows platforms are assumed to be POSIX, which has getpid(). + +# include + +ProfilerProcessId profiler_current_process_id() { + return ProfilerProcessId::FromNativeId(getpid()); +} + +// --------------------------------------------- Non-Windows thread id +// ------------------------------------------------------- macOS +# if defined(XP_MACOSX) + +# include + +ProfilerThreadId profiler_current_thread_id() { + uint64_t tid; + if (pthread_threadid_np(nullptr, &tid) != 0) { + return ProfilerThreadId{}; + } + return ProfilerThreadId::FromNativeId(tid); +} + +// ------------------------------------------------------- Android +// Test Android before Linux, because Linux includes Android. +# elif defined(__ANDROID__) || defined(ANDROID) + +ProfilerThreadId profiler_current_thread_id() { + return ProfilerThreadId::FromNativeId(gettid()); +} + +// ------------------------------------------------------- Linux +# elif defined(XP_LINUX) + +# include + +ProfilerThreadId profiler_current_thread_id() { + // glibc doesn't provide a wrapper for gettid() until 2.30 + return ProfilerThreadId::FromNativeId(syscall(SYS_gettid)); +} + +// ------------------------------------------------------- FreeBSD +# elif defined(XP_FREEBSD) + +# include + +ProfilerThreadId profiler_current_thread_id() { + long id; + if (thr_self(&id) != 0) { + return ProfilerThreadId{}; + } + return ProfilerThreadId::FromNativeId(id); +} + +// ------------------------------------------------------- Others +# else + +ProfilerThreadId profiler_current_thread_id() { + return ProfilerThreadId::FromNativeId(std::this_thread::get_id()); +} + +# endif +#endif // End of non-XP_WIN. + +// --------------------------------------------- Platform-agnostic definitions + +#include "MainThreadUtils.h" +#include "mozilla/Assertions.h" + +static ProfilerThreadId scProfilerMainThreadId; + +void profiler_init_main_thread_id() { + MOZ_ASSERT(NS_IsMainThread()); + mozilla::baseprofiler::profiler_init_main_thread_id(); + if (!scProfilerMainThreadId.IsSpecified()) { + scProfilerMainThreadId = profiler_current_thread_id(); + } +} + +[[nodiscard]] ProfilerThreadId profiler_main_thread_id() { + return scProfilerMainThreadId; +} + +[[nodiscard]] bool profiler_is_main_thread() { + return profiler_current_thread_id() == scProfilerMainThreadId; +} diff --git a/tools/profiler/core/VTuneProfiler.cpp b/tools/profiler/core/VTuneProfiler.cpp new file mode 100644 index 0000000000..58a39c51ee --- /dev/null +++ b/tools/profiler/core/VTuneProfiler.cpp @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_WIN +# undef UNICODE +# undef _UNICODE +#endif + +#include "VTuneProfiler.h" +#include "mozilla/Bootstrap.h" +#include + +VTuneProfiler* VTuneProfiler::mInstance = nullptr; + +void VTuneProfiler::Initialize() { + // This is just a 'dirty trick' to find out if the ittnotify DLL was found. + // If it wasn't this function always returns 0, otherwise it returns + // incrementing numbers, if the library was found this wastes 2 events but + // that should be okay. + __itt_event testEvent = + __itt_event_create("Test event", strlen("Test event")); + testEvent = __itt_event_create("Test event 2", strlen("Test event 2")); + + if (testEvent) { + mInstance = new VTuneProfiler(); + } +} + +void VTuneProfiler::Shutdown() {} + +void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) { + std::string str(aName); + + auto iter = mStrings.find(str); + + __itt_event event; + if (iter != mStrings.end()) { + event = iter->second; + } else { + event = __itt_event_create(aName, str.length()); + mStrings.insert({str, event}); + } + + if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) { + // VTune will consider starts not matched with an end to be single point in + // time events. + __itt_event_start(event); + } else { + __itt_event_end(event); + } +} + +void VTuneProfiler::RegisterThreadInternal(const char* aName) { + std::string str(aName); + + if (!str.compare("GeckoMain")) { + // Process main thread. + switch (XRE_GetProcessType()) { + case GeckoProcessType::GeckoProcessType_Default: + __itt_thread_set_name("Main Process"); + break; + case GeckoProcessType::GeckoProcessType_Content: + __itt_thread_set_name("Content Process"); + break; + case GeckoProcessType::GeckoProcessType_GMPlugin: + __itt_thread_set_name("Plugin Process"); + break; + case GeckoProcessType::GeckoProcessType_GPU: + __itt_thread_set_name("GPU Process"); + break; + default: + __itt_thread_set_name("Unknown Process"); + } + return; + } + __itt_thread_set_name(aName); +} diff --git a/tools/profiler/core/VTuneProfiler.h b/tools/profiler/core/VTuneProfiler.h new file mode 100644 index 0000000000..e3abe6b90d --- /dev/null +++ b/tools/profiler/core/VTuneProfiler.h @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VTuneProfiler_h +#define VTuneProfiler_h + +// The intent here is to add 0 overhead for regular users. In order to build +// the VTune profiler code at all --enable-vtune-instrumentation needs to be +// set as a build option. Even then, when none of the environment variables +// is specified that allow us to find the ittnotify DLL, these functions +// should be minimal overhead. When starting Firefox under VTune, these +// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64 +// should be set to point at the ittnotify DLL. +#ifndef MOZ_VTUNE_INSTRUMENTATION + +# define VTUNE_INIT() +# define VTUNE_SHUTDOWN() + +# define VTUNE_TRACING(name, kind) +# define VTUNE_REGISTER_THREAD(name) + +#else + +# include "GeckoProfiler.h" + +// This is the regular Intel header, these functions are actually defined for +// us inside js/src/vtune by an intel C file which actually dynamically resolves +// them to the correct DLL. Through libxul these will 'magically' resolve. +# include "vtune/ittnotify.h" + +# include +# include +# include + +class VTuneProfiler { + public: + static void Initialize(); + static void Shutdown(); + + enum TracingKind { + TRACING_EVENT, + TRACING_INTERVAL_START, + TRACING_INTERVAL_END, + }; + + static void Trace(const char* aName, TracingKind aKind) { + if (mInstance) { + mInstance->TraceInternal(aName, aKind); + } + } + static void RegisterThread(const char* aName) { + if (mInstance) { + mInstance->RegisterThreadInternal(aName); + } + } + + private: + void TraceInternal(const char* aName, TracingKind aKind); + void RegisterThreadInternal(const char* aName); + + // This is null when the ittnotify DLL could not be found. + static VTuneProfiler* mInstance; + + std::unordered_map mStrings; +}; + +# define VTUNE_INIT() VTuneProfiler::Initialize() +# define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown() + +# define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind) +# define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name) + +#endif + +#endif /* VTuneProfiler_h */ diff --git a/tools/profiler/core/memory_hooks.cpp b/tools/profiler/core/memory_hooks.cpp new file mode 100644 index 0000000000..59e87d607c --- /dev/null +++ b/tools/profiler/core/memory_hooks.cpp @@ -0,0 +1,632 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "memory_hooks.h" + +#include "nscore.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/FastBernoulliTrial.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/PlatformMutex.h" +#include "mozilla/ProfilerCounts.h" +#include "mozilla/ThreadLocal.h" + +#include "GeckoProfiler.h" +#include "prenv.h" +#include "replace_malloc.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef XP_WIN +# include +# include +#else +# include +# include +# include +#endif + +#ifdef ANDROID +# include +#endif + +// The counters start out as a nullptr, and then get initialized only once. They +// are never destroyed, as it would cause race conditions for the memory hooks +// that use the counters. This helps guard against potentially expensive +// operations like using a mutex. +// +// In addition, this is a raw pointer and not a UniquePtr, as the counter +// machinery will try and de-register itself from the profiler. This could +// happen after the profiler and its PSMutex was already destroyed, resulting in +// a crash. +static ProfilerCounterTotal* sCounter; + +// The gBernoulli value starts out as a nullptr, and only gets initialized once. +// It then lives for the entire lifetime of the process. It cannot be deleted +// without additional multi-threaded protections, since if we deleted it during +// profiler_stop then there could be a race between threads already in a +// memory hook that might try to access the value after or during deletion. +static mozilla::FastBernoulliTrial* gBernoulli; + +namespace mozilla::profiler { + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +// Returns true or or false depending on whether the marker was actually added +// or not. +static bool profiler_add_native_allocation_marker(int64_t aSize, + uintptr_t aMemoryAddress) { + if (!profiler_thread_is_being_profiled_for_markers( + profiler_main_thread_id())) { + return false; + } + + // Because native allocations may be intercepted anywhere, blocking while + // locking the profiler mutex here could end up causing a deadlock if another + // mutex is taken, which the profiler may indirectly need elsewhere. + // See bug 1642726 for such a scenario. + // So instead we bail out if the mutex is already locked. Native allocations + // are statistically sampled anyway, so missing a few because of this is + // acceptable. + if (profiler_is_locked_on_current_thread()) { + return false; + } + + struct NativeAllocationMarker { + static constexpr mozilla::Span MarkerTypeName() { + return mozilla::MakeStringSpan("Native allocation"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize, + uintptr_t aMemoryAddress, ProfilerThreadId aThreadId) { + aWriter.IntProperty("size", aSize); + aWriter.IntProperty("memoryAddress", + static_cast(aMemoryAddress)); + // Tech note: If `ToNumber()` returns a uint64_t, the conversion to + // int64_t is "implementation-defined" before C++20. This is acceptable + // here, because this is a one-way conversion to a unique identifier + // that's used to visually separate data by thread on the front-end. + aWriter.IntProperty("threadId", + static_cast(aThreadId.ToNumber())); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + + profiler_add_marker("Native allocation", geckoprofiler::category::OTHER, + {MarkerThreadId::MainThread(), MarkerStack::Capture()}, + NativeAllocationMarker{}, aSize, aMemoryAddress, + profiler_current_thread_id()); + return true; +} + +static malloc_table_t gMallocTable; + +// This is only needed because of the |const void*| vs |void*| arg mismatch. +static size_t MallocSizeOf(const void* aPtr) { + return gMallocTable.malloc_usable_size(const_cast(aPtr)); +} + +// The values for the Bernoulli trial are taken from DMD. According to DMD: +// +// In testing, a probability of 0.003 resulted in ~25% of heap blocks getting +// a stack trace and ~80% of heap bytes getting a stack trace. (This is +// possible because big heap blocks are more likely to get a stack trace.) +// +// The random number seeds are arbitrary and were obtained from random.org. +// +// However this value resulted in a lot of slowdown since the profiler stacks +// are pretty heavy to collect. The value was lowered to 10% of the original to +// 0.0003. +static void EnsureBernoulliIsInstalled() { + if (!gBernoulli) { + // This is only installed once. See the gBernoulli definition for more + // information. + gBernoulli = + new FastBernoulliTrial(0.0003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); + } +} + +// This class provides infallible allocations (they abort on OOM) like +// mozalloc's InfallibleAllocPolicy, except that memory hooks are bypassed. This +// policy is used by the HashSet. +class InfallibleAllocWithoutHooksPolicy { + static void ExitOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("Profiler memory hooks out of memory; aborting"); + } + } + + public: + template + static T* maybe_pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask::value) { + return nullptr; + } + return (T*)gMallocTable.malloc(aNumElems * sizeof(T)); + } + + template + static T* maybe_pod_calloc(size_t aNumElems) { + return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); + } + + template + static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask::value) { + return nullptr; + } + return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T)); + } + + template + static T* pod_malloc(size_t aNumElems) { + T* p = maybe_pod_malloc(aNumElems); + ExitOnFailure(p); + return p; + } + + template + static T* pod_calloc(size_t aNumElems) { + T* p = maybe_pod_calloc(aNumElems); + ExitOnFailure(p); + return p; + } + + template + static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); + ExitOnFailure(p); + return p; + } + + template + static void free_(T* aPtr, size_t aSize = 0) { + gMallocTable.free(aPtr); + } + + static void reportAllocOverflow() { ExitOnFailure(nullptr); } + bool checkSimulatedOOM() const { return true; } +}; + +// We can't use mozilla::Mutex because it causes re-entry into the memory hooks. +// Define a custom implementation here. +class Mutex : private ::mozilla::detail::MutexImpl { + public: + Mutex() : ::mozilla::detail::MutexImpl() {} + + void Lock() { ::mozilla::detail::MutexImpl::lock(); } + void Unlock() { ::mozilla::detail::MutexImpl::unlock(); } +}; + +class MutexAutoLock { + MutexAutoLock(const MutexAutoLock&) = delete; + void operator=(const MutexAutoLock&) = delete; + + Mutex& mMutex; + + public: + explicit MutexAutoLock(Mutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); } + ~MutexAutoLock() { mMutex.Unlock(); } +}; + +//--------------------------------------------------------------------------- +// Tracked allocations +//--------------------------------------------------------------------------- + +// The allocation tracker is shared between multiple threads, and is the +// coordinator for knowing when allocations have been tracked. The mutable +// internal state is protected by a mutex, and managed by the methods. +// +// The tracker knows about all the allocations that we have added to the +// profiler. This way, whenever any given piece of memory is freed, we can see +// if it was previously tracked, and we can track its deallocation. + +class AllocationTracker { + // This type tracks all of the allocations that we have captured. This way, we + // can see if a deallocation is inside of this set. We want to provide a + // balanced view into the allocations and deallocations. + typedef mozilla::HashSet, + InfallibleAllocWithoutHooksPolicy> + AllocationSet; + + public: + AllocationTracker() : mAllocations(), mMutex() {} + + void AddMemoryAddress(const void* memoryAddress) { + MutexAutoLock lock(mMutex); + if (!mAllocations.put(memoryAddress)) { + MOZ_CRASH("Out of memory while tracking native allocations."); + }; + } + + void Reset() { + MutexAutoLock lock(mMutex); + mAllocations.clearAndCompact(); + } + + // Returns true when the memory address is found and removed, otherwise that + // memory address is not being tracked and it returns false. + bool RemoveMemoryAddressIfFound(const void* memoryAddress) { + MutexAutoLock lock(mMutex); + + auto ptr = mAllocations.lookup(memoryAddress); + if (ptr) { + // The memory was present. It no longer needs to be tracked. + mAllocations.remove(ptr); + return true; + } + + return false; + } + + private: + AllocationSet mAllocations; + Mutex mMutex MOZ_UNANNOTATED; +}; + +static AllocationTracker* gAllocationTracker; + +static void EnsureAllocationTrackerIsInstalled() { + if (!gAllocationTracker) { + // This is only installed once. + gAllocationTracker = new AllocationTracker(); + } +} + +//--------------------------------------------------------------------------- +// Per-thread blocking of intercepts +//--------------------------------------------------------------------------- + +// On MacOS, and Linux the first __thread/thread_local access calls malloc, +// which leads to an infinite loop. So we use pthread-based TLS instead, which +// somehow doesn't have this problem. +#if !defined(XP_DARWIN) && !defined(XP_LINUX) +# define PROFILER_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define PROFILER_THREAD_LOCAL(T) \ + ::mozilla::detail::ThreadLocal +#endif + +// This class is used to determine if allocations on this thread should be +// intercepted or not. +// Creating a ThreadIntercept object on the stack will implicitly block nested +// ones. There are other reasons to block: The feature is off, or we're inside a +// profiler function that is locking a mutex. +class MOZ_RAII ThreadIntercept { + // When set to true, malloc does not intercept additional allocations. This is + // needed because collecting stacks creates new allocations. When blocked, + // these allocations are then ignored by the memory hook. + static PROFILER_THREAD_LOCAL(bool) tlsIsBlocked; + + // This is a quick flag to check and see if the allocations feature is enabled + // or disabled. + static mozilla::Atomic sAllocationsFeatureEnabled; + + // True if this ThreadIntercept has set tlsIsBlocked. + bool mIsBlockingTLS; + + // True if interception is blocked for any reason. + bool mIsBlocked; + + public: + static void Init() { + tlsIsBlocked.infallibleInit(); + // infallibleInit should zero-initialize, which corresponds to `false`. + MOZ_ASSERT(!tlsIsBlocked.get()); + } + + ThreadIntercept() { + // If the allocation interception feature is enabled, and the TLS is not + // blocked yet, we will block the TLS now, and unblock on destruction. + mIsBlockingTLS = sAllocationsFeatureEnabled && !tlsIsBlocked.get(); + if (mIsBlockingTLS) { + MOZ_ASSERT(!tlsIsBlocked.get()); + tlsIsBlocked.set(true); + // Since this is the top-level ThreadIntercept, interceptions are not + // blocked unless the profiler itself holds a locked mutex, in which case + // we don't want to intercept allocations that originate from such a + // profiler call. + mIsBlocked = profiler_is_locked_on_current_thread(); + } else { + // The feature is off, or the TLS was already blocked, then we block this + // interception. + mIsBlocked = true; + } + } + + ~ThreadIntercept() { + if (mIsBlockingTLS) { + MOZ_ASSERT(tlsIsBlocked.get()); + tlsIsBlocked.set(false); + } + } + + // Is this ThreadIntercept effectively blocked? (Feature is off, or this + // ThreadIntercept is nested, or we're inside a locked-Profiler function.) + bool IsBlocked() const { return mIsBlocked; } + + static void EnableAllocationFeature() { sAllocationsFeatureEnabled = true; } + + static void DisableAllocationFeature() { sAllocationsFeatureEnabled = false; } +}; + +PROFILER_THREAD_LOCAL(bool) ThreadIntercept::tlsIsBlocked; + +mozilla::Atomic + ThreadIntercept::sAllocationsFeatureEnabled(false); + +//--------------------------------------------------------------------------- +// malloc/free callbacks +//--------------------------------------------------------------------------- + +static void AllocCallback(void* aPtr, size_t aReqSize) { + if (!aPtr) { + return; + } + + // The first part of this function does not allocate. + size_t actualSize = gMallocTable.malloc_usable_size(aPtr); + if (actualSize > 0) { + sCounter->Add(actualSize); + } + + ThreadIntercept threadIntercept; + if (threadIntercept.IsBlocked()) { + // Either the native allocations feature is not turned on, or we may be + // recursing into a memory hook, return. We'll still collect counter + // information about this allocation, but no stack. + return; + } + + AUTO_PROFILER_LABEL("AllocCallback", PROFILER); + + // Perform a bernoulli trial, which will return true or false based on its + // configured probability. It takes into account the byte size so that + // larger allocations are weighted heavier than smaller allocations. + MOZ_ASSERT(gBernoulli, + "gBernoulli must be properly installed for the memory hooks."); + if ( + // First perform the Bernoulli trial. + gBernoulli->trial(actualSize) && + // Second, attempt to add a marker if the Bernoulli trial passed. + profiler_add_native_allocation_marker( + static_cast(actualSize), + reinterpret_cast(aPtr))) { + MOZ_ASSERT(gAllocationTracker, + "gAllocationTracker must be properly installed for the memory " + "hooks."); + // Only track the memory if the allocation marker was actually added to the + // profiler. + gAllocationTracker->AddMemoryAddress(aPtr); + } + + // We're ignoring aReqSize here +} + +static void FreeCallback(void* aPtr) { + if (!aPtr) { + return; + } + + // The first part of this function does not allocate. + size_t unsignedSize = MallocSizeOf(aPtr); + int64_t signedSize = -(static_cast(unsignedSize)); + sCounter->Add(signedSize); + + ThreadIntercept threadIntercept; + if (threadIntercept.IsBlocked()) { + // Either the native allocations feature is not turned on, or we may be + // recursing into a memory hook, return. We'll still collect counter + // information about this allocation, but no stack. + return; + } + + AUTO_PROFILER_LABEL("FreeCallback", PROFILER); + + // Perform a bernoulli trial, which will return true or false based on its + // configured probability. It takes into account the byte size so that + // larger allocations are weighted heavier than smaller allocations. + MOZ_ASSERT( + gAllocationTracker, + "gAllocationTracker must be properly installed for the memory hooks."); + if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) { + // This size here is negative, indicating a deallocation. + profiler_add_native_allocation_marker(signedSize, + reinterpret_cast(aPtr)); + } +} + +} // namespace mozilla::profiler + +//--------------------------------------------------------------------------- +// malloc/free interception +//--------------------------------------------------------------------------- + +using namespace mozilla::profiler; + +static void* replace_malloc(size_t aSize) { + // This must be a call to malloc from outside. Intercept it. + void* ptr = gMallocTable.malloc(aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void* replace_calloc(size_t aCount, size_t aSize) { + void* ptr = gMallocTable.calloc(aCount, aSize); + AllocCallback(ptr, aCount * aSize); + return ptr; +} + +static void* replace_realloc(void* aOldPtr, size_t aSize) { + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. + if (!aOldPtr) { + return replace_malloc(aSize); + } + + FreeCallback(aOldPtr); + void* ptr = gMallocTable.realloc(aOldPtr, aSize); + if (ptr) { + AllocCallback(ptr, aSize); + } else { + // If realloc fails, we undo the prior operations by re-inserting the old + // pointer into the live block table. We don't have to do anything with the + // dead block list because the dead block hasn't yet been inserted. The + // block will end up looking like it was allocated for the first time here, + // which is untrue, and the slop bytes will be zero, which may be untrue. + // But this case is rare and doing better isn't worth the effort. + AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr)); + } + return ptr; +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + void* ptr = gMallocTable.memalign(aAlignment, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void replace_free(void* aPtr) { + FreeCallback(aPtr); + gMallocTable.free(aPtr); +} + +static void* replace_moz_arena_malloc(arena_id_t aArena, size_t aSize) { + void* ptr = gMallocTable.moz_arena_malloc(aArena, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void* replace_moz_arena_calloc(arena_id_t aArena, size_t aCount, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_calloc(aArena, aCount, aSize); + AllocCallback(ptr, aCount * aSize); + return ptr; +} + +static void* replace_moz_arena_realloc(arena_id_t aArena, void* aPtr, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_realloc(aArena, aPtr, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void replace_moz_arena_free(arena_id_t aArena, void* aPtr) { + FreeCallback(aPtr); + gMallocTable.moz_arena_free(aArena, aPtr); +} + +static void* replace_moz_arena_memalign(arena_id_t aArena, size_t aAlignment, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_memalign(aArena, aAlignment, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +// we have to replace these or jemalloc will assume we don't implement any +// of the arena replacements! +static arena_id_t replace_moz_create_arena_with_params( + arena_params_t* aParams) { + return gMallocTable.moz_create_arena_with_params(aParams); +} + +static void replace_moz_dispose_arena(arena_id_t aArenaId) { + return gMallocTable.moz_dispose_arena(aArenaId); +} + +static void replace_moz_set_max_dirty_page_modifier(int32_t aModifier) { + return gMallocTable.moz_set_max_dirty_page_modifier(aModifier); +} + +// Must come after all the replace_* funcs +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + gMallocTable = *aMallocTable; +#define MALLOC_FUNCS (MALLOC_FUNCS_MALLOC_BASE | MALLOC_FUNCS_ARENA) +#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; +#include "malloc_decls.h" +} + +void profiler_replace_remove() {} + +namespace mozilla::profiler { +//--------------------------------------------------------------------------- +// Initialization +//--------------------------------------------------------------------------- + +BaseProfilerCount* install_memory_hooks() { + if (!sCounter) { + sCounter = new ProfilerCounterTotal("malloc", "Memory", + "Amount of allocated memory"); + // Also initialize the ThreadIntercept, even if native allocation tracking + // won't be turned on. This way the TLS will be initialized. + ThreadIntercept::Init(); + } else { + sCounter->Clear(); + } + jemalloc_replace_dynamic(replace_init); + return sCounter; +} + +// Remove the hooks, but leave the sCounter machinery. Deleting the counter +// would race with any existing memory hooks that are currently running. Rather +// than adding overhead here of mutexes it's cheaper for the performance to just +// leak these values. +void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); } + +void enable_native_allocations() { + // The bloat log tracks allocations and deallocations. This can conflict + // with the memory hook machinery, as the bloat log creates its own + // allocations. This means we can re-enter inside the bloat log machinery. At + // this time, the bloat log does not know about cannot handle the native + // allocation feature. + // + // At the time of this writing, we hit this assertion: + // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp() + // + // #01: GetBloatEntry(char const*, unsigned int) + // #02: NS_LogCtor + // #03: profiler_get_backtrace() + // #04: profiler_add_native_allocation_marker(long long) + // #05: mozilla::profiler::AllocCallback(void*, unsigned long) + // #06: replace_calloc(unsigned long, unsigned long) + // #07: PLDHashTable::ChangeTable(int) + // #08: PLDHashTable::Add(void const*, std::nothrow_t const&) + // #09: nsBaseHashtable, ... + // #10: GetBloatEntry(char const*, unsigned int) + // #11: NS_LogCtor + // #12: profiler_get_backtrace() + // ... + MOZ_ASSERT(!PR_GetEnv("XPCOM_MEM_BLOAT_LOG"), + "The bloat log feature is not compatible with the native " + "allocations instrumentation."); + + EnsureBernoulliIsInstalled(); + EnsureAllocationTrackerIsInstalled(); + ThreadIntercept::EnableAllocationFeature(); +} + +// This is safe to call even if native allocations hasn't been enabled. +void disable_native_allocations() { + ThreadIntercept::DisableAllocationFeature(); + if (gAllocationTracker) { + gAllocationTracker->Reset(); + } +} + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/memory_hooks.h b/tools/profiler/core/memory_hooks.h new file mode 100644 index 0000000000..a6ace771dd --- /dev/null +++ b/tools/profiler/core/memory_hooks.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef memory_hooks_h +#define memory_hooks_h + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) +class BaseProfilerCount; + +namespace mozilla { +namespace profiler { + +BaseProfilerCount* install_memory_hooks(); +void remove_memory_hooks(); +void enable_native_allocations(); +void disable_native_allocations(); + +} // namespace profiler +} // namespace mozilla +#endif + +#endif diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp new file mode 100644 index 0000000000..6bcb9cf38b --- /dev/null +++ b/tools/profiler/core/platform-linux-android.cpp @@ -0,0 +1,636 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// This file is used for both Linux and Android as well as FreeBSD. + +#include +#include + +#include +#if defined(GP_OS_freebsd) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +// Ubuntu Dapper requires memory pages to be marked as +// executable. Otherwise, OS raises an exception when executing code +// in that page. +#include // mmap & munmap +#include // mmap & munmap +#include // open +#include // open +#include // sysconf +#include +#ifdef __GLIBC__ +# include // backtrace, backtrace_symbols +#endif // def __GLIBC__ +#include // index +#include +#include + +#include "prenv.h" +#include "mozilla/PodOperations.h" +#include "mozilla/DebugOnly.h" +#if defined(GP_OS_linux) || defined(GP_OS_android) +# include "common/linux/breakpad_getcontext.h" +#endif + +#include +#include + +using namespace mozilla; + +static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) { + aRegs.mContext = aContext; + mcontext_t& mcontext = aContext->uc_mcontext; + + // Extracting the sample from the context is extremely machine dependent. +#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_EIP]); + aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_ESP]); + aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_EBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_RIP]); + aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_RSP]); + aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_RBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_freebsd) + aRegs.mPC = reinterpret_cast
(mcontext.mc_rip); + aRegs.mSP = reinterpret_cast
(mcontext.mc_rsp); + aRegs.mFP = reinterpret_cast
(mcontext.mc_rbp); + aRegs.mLR = 0; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + aRegs.mPC = reinterpret_cast
(mcontext.arm_pc); + aRegs.mSP = reinterpret_cast
(mcontext.arm_sp); + aRegs.mFP = reinterpret_cast
(mcontext.arm_fp); + aRegs.mLR = reinterpret_cast
(mcontext.arm_lr); +#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + aRegs.mPC = reinterpret_cast
(mcontext.pc); + aRegs.mSP = reinterpret_cast
(mcontext.sp); + aRegs.mFP = reinterpret_cast
(mcontext.regs[29]); + aRegs.mLR = reinterpret_cast
(mcontext.regs[30]); +#elif defined(GP_PLAT_arm64_freebsd) + aRegs.mPC = reinterpret_cast
(mcontext.mc_gpregs.gp_elr); + aRegs.mSP = reinterpret_cast
(mcontext.mc_gpregs.gp_sp); + aRegs.mFP = reinterpret_cast
(mcontext.mc_gpregs.gp_x[29]); + aRegs.mLR = reinterpret_cast
(mcontext.mc_gpregs.gp_lr); +#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android) + aRegs.mPC = reinterpret_cast
(mcontext.pc); + aRegs.mSP = reinterpret_cast
(mcontext.gregs[29]); + aRegs.mFP = reinterpret_cast
(mcontext.gregs[30]); + +#else +# error "bad platform" +#endif +} + +#if defined(GP_OS_android) +# define SYS_tgkill __NR_tgkill +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) +int tgkill(pid_t tgid, pid_t tid, int signalno) { + return syscall(SYS_tgkill, tgid, tid, signalno); +} +#endif + +#if defined(GP_OS_freebsd) +# define tgkill thr_kill2 +#endif + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) { + MOZ_ASSERT(aThreadId == profiler_current_thread_id()); + if (clockid_t clockid; pthread_getcpuclockid(pthread_self(), &clockid) == 0) { + mClockId = Some(clockid); + } +} + +mozilla::profiler::PlatformData::~PlatformData() = default; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +// The only way to reliably interrupt a Linux thread and inspect its register +// and stack state is by sending a signal to it, and doing the work inside the +// signal handler. But we don't want to run much code inside the signal +// handler, since POSIX severely restricts what we can do in signal handlers. +// So we use a system of semaphores to suspend the thread and allow the +// sampler thread to do all the work of unwinding and copying out whatever +// data it wants. +// +// A four-message protocol is used to reliably suspend and later resume the +// thread to be sampled (the samplee): +// +// Sampler (signal sender) thread Samplee (thread to be sampled) +// +// Prepare the SigHandlerCoordinator +// and point sSigHandlerCoordinator at it +// +// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler) +// wait(mMessage2) Copy register state +// into sSigHandlerCoordinator +// <------ MSG 2 ----- post(mMessage2) +// Samplee is now suspended. wait(mMessage3) +// Examine its stack/register +// state at leisure +// +// Release samplee: +// post(mMessage3) ------- MSG 3 -----> +// wait(mMessage4) Samplee now resumes. Tell +// the sampler that we are done. +// <------ MSG 4 ------ post(mMessage4) +// Now we know the samplee's signal (leave signal handler) +// handler has finished using +// sSigHandlerCoordinator. We can +// safely reuse it for some other thread. +// + +// A type used to coordinate between the sampler (signal sending) thread and +// the thread currently being sampled (the samplee, which receives the +// signals). +// +// The first message is sent using a SIGPROF signal delivery. The subsequent +// three are sent using sem_wait/sem_post pairs. They are named accordingly +// in the following struct. +struct SigHandlerCoordinator { + SigHandlerCoordinator() { + PodZero(&mUContext); + int r = sem_init(&mMessage2, /* pshared */ 0, 0); + r |= sem_init(&mMessage3, /* pshared */ 0, 0); + r |= sem_init(&mMessage4, /* pshared */ 0, 0); + MOZ_ASSERT(r == 0); + (void)r; + } + + ~SigHandlerCoordinator() { + int r = sem_destroy(&mMessage2); + r |= sem_destroy(&mMessage3); + r |= sem_destroy(&mMessage4); + MOZ_ASSERT(r == 0); + (void)r; + } + + sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator" + sem_t mMessage3; // To samplee: "resume" + sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator" + ucontext_t mUContext; // Context at signal +}; + +struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr; + +static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) { + // Avoid TSan warning about clobbering errno. + int savedErrno = errno; + + MOZ_ASSERT(aSignal == SIGPROF); + MOZ_ASSERT(Sampler::sSigHandlerCoordinator); + + // By sending us this signal, the sampler thread has sent us message 1 in + // the comment above, with the meaning "|sSigHandlerCoordinator| is ready + // for use, please copy your register context into it." + Sampler::sSigHandlerCoordinator->mUContext = + *static_cast(aContext); + + // Send message 2: tell the sampler thread that the context has been copied + // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by + // being interrupted by a signal, so there's no loop around this call. + int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2); + MOZ_ASSERT(r == 0); + + // At this point, the sampler thread assumes we are suspended, so we must + // not touch any global state here. + + // Wait for message 3: the sampler thread tells us to resume. + while (true) { + r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure + MOZ_ASSERT(r == 0); + break; + } + + // Send message 4: tell the sampler thread that we are finished accessing + // |sSigHandlerCoordinator|. After this point it is not safe to touch + // |sSigHandlerCoordinator|. + r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4); + MOZ_ASSERT(r == 0); + + errno = savedErrno; +} + +Sampler::Sampler(PSLockRef aLock) + : mMyPid(profiler_current_process_id()), + // We don't know what the sampler thread's ID will be until it runs, so + // set mSamplerTid to a dummy value and fill it in for real in + // SuspendAndSampleAndResumeThread(). + mSamplerTid{} { +#if defined(USE_EHABI_STACKWALK) + mozilla::EHABIStackWalkInit(); +#endif + + // NOTE: We don't initialize LUL here, instead initializing it in + // SamplerThread's constructor. This is because with the + // profiler_suspend_and_sample_thread entry point, we want to be able to + // sample without waiting for LUL to be initialized. + + // Request profiling signals. + struct sigaction sa; + sa.sa_sigaction = SigprofHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) { + MOZ_CRASH("Error installing SIGPROF handler in the profiler"); + } +} + +void Sampler::Disable(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. + sigaction(SIGPROF, &mOldSigprofHandler, 0); +} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + aWriter.StringProperty("threadCPUDelta", "ns"); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + return aRawValue; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + Maybe maybeCid = aPlatformData.GetClockId(); + if (MOZ_UNLIKELY(!maybeCid)) { + return false; + } + + timespec t; + if (clock_gettime(*maybeCid, &t) != 0) { + return false; + } + + *aResult = uint64_t(t.tv_sec) * 1'000'000'000u + uint64_t(t.tv_nsec); + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_clock_gettime); + if (timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0) { + newRunningTimes.SetThreadCPUDelta(uint64_t(ts.tv_sec) * 1'000'000'000u + + uint64_t(ts.tv_nsec)); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime_thread); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + Maybe maybeCid = platformData.GetClockId(); + + if (MOZ_UNLIKELY(!maybeCid)) { + // No clock id -> Nothing to measure apart from the timestamp. + RunningTimes emptyRunningTimes; + emptyRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + return emptyRunningTimes; + } + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [cid = *maybeCid](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime); + if (timespec ts; clock_gettime(cid, &ts) == 0) { + aRunningTimes.ResetThreadCPUDelta( + uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec)); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes); + + // On Linux, suspending a thread uses a signal that makes that thread work + // to handle it. So we want to discard any added running time since the call + // to GetThreadRunningTimesDiff, which is done by overwriting the thread's + // PreviousThreadRunningTimesRef() with the current running time now. + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + Maybe maybeCid = platformData.GetClockId(); + + if (MOZ_UNLIKELY(!maybeCid)) { + // No clock id -> Nothing to measure. + return; + } + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + + if (timespec ts; clock_gettime(*maybeCid, &ts) == 0) { + previousRunningTimes.ResetThreadCPUDelta( + uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec)); + } else { + previousRunningTimes.ClearThreadCPUDelta(); + } +} + +template +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + // Only one sampler thread can be sampling at once. So we expect to have + // complete control over |sSigHandlerCoordinator|. + MOZ_ASSERT(!sSigHandlerCoordinator); + + if (!mSamplerTid.IsSpecified()) { + mSamplerTid = profiler_current_thread_id(); + } + ProfilerThreadId sampleeTid = aThreadData.Info().ThreadId(); + MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + SigHandlerCoordinator coord; // on sampler thread's stack + sSigHandlerCoordinator = &coord; + + // Send message 1 to the samplee (the thread to be sampled), by + // signalling at it. + // This could fail if the thread doesn't exist anymore. + int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF); + if (r == 0) { + // Wait for message 2 from the samplee, indicating that the context + // is available and that the thread is suspended. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage2); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure. + MOZ_ASSERT(r == 0); + break; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. In the critical section, + // we must not do any dynamic memory allocation, nor try to acquire any lock + // or any other unshareable resource. This is because the thread to be + // sampled has been suspended at some entirely arbitrary point, and we have + // no idea which unsharable resources (locks, essentially) it holds. So any + // attempt to acquire any lock, including the implied locks used by the + // malloc implementation, risks deadlock. This includes TimeStamp::Now(), + // which gets a lock on Windows. + + // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is + // valid. We can poke around in it and unwind its stack as we like. + + // Extract the current register values. + Registers regs; + PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + // Send message 3 to the samplee, which tells it to resume. + r = sem_post(&sSigHandlerCoordinator->mMessage3); + MOZ_ASSERT(r == 0); + + // Wait for message 4 from the samplee, which tells us that it has + // finished with |sSigHandlerCoordinator|. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage4); + if (r == -1 && errno == EINTR) { + continue; + } + MOZ_ASSERT(r == 0); + break; + } + + // The profiler's critical section ends here. After this point, none of the + // critical section limitations documented above apply. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + } + + // This isn't strictly necessary, but doing so does help pick up anomalies + // in which the signal handler is running when it shouldn't be. + sSigHandlerCoordinator = nullptr; +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) { +#if defined(USE_LUL_STACKWALK) + lul::LUL* lul = CorePS::Lul(); + if (!lul && ProfilerFeature::HasStackWalk(aFeatures)) { + CorePS::SetLul(MakeUnique(logging_sink_for_LUL)); + // Read all the unwind info currently available. + lul = CorePS::Lul(); + read_procmaps(lul); + + // Switch into unwind mode. After this point, we can't add or remove any + // unwind info to/from this LUL instance. The only thing we can do with + // it is Unwind() calls. + lul->EnableUnwinding(); + + // Has a test been requested? + if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, lul); + } + } +#endif + + // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending + // the signal ourselves instead of relying on itimer provides much better + // accuracy. + // + // At least 350 KiB of stack space are needed when built with TSAN. This + // includes lul::N_STACK_BYTES plus whatever else is needed for the sampler + // thread. Set the stack size to 800 KiB to keep a safe margin above that. + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0 || + pthread_attr_setstacksize(&attr, 800 * 1024) != 0 || + pthread_create(&mThread, &attr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } + pthread_attr_destroy(&attr); +} + +SamplerThread::~SamplerThread() { + pthread_join(mThread, nullptr); + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + if (aMicroseconds >= 1000000) { + // Use usleep for larger intervals, because the nanosleep + // code below only supports intervals < 1 second. + MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds)); + return; + } + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = aMicroseconds * 1000UL; + + int rv = ::nanosleep(&ts, &ts); + + while (rv != 0 && errno == EINTR) { + // Keep waiting in case of interrupt. + // nanosleep puts the remaining time back into ts. + rv = ::nanosleep(&ts, &ts); + } + + MOZ_ASSERT(!rv, "nanosleep call failed"); +} + +void SamplerThread::Stop(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. It's safe to do this now even + // though this SamplerThread is still alive, because the next time the main + // loop of Run() iterates it won't get past the mActivityGeneration check, + // and so won't send any signals. + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + +// We use pthread_atfork() to temporarily disable signal delivery during any +// fork() call. Without that, fork() can be repeatedly interrupted by signal +// delivery, requiring it to be repeatedly restarted, which can lead to *long* +// delays. See bug 837390. +// +// We provide no paf_child() function to run in the child after forking. This +// is fine because we always immediately exec() after fork(), and exec() +// clobbers all process state. Also, we don't want the sampler to resume in the +// child process between fork() and exec(), it would be wasteful. +// +// Unfortunately all this is only doable on non-Android because Bionic doesn't +// have pthread_atfork. + +// In the parent, before the fork, increase gSkipSampling to ensure that +// profiler sampling loops will be skipped. There could be one in progress now, +// causing a small delay, but further sampling will be skipped, allowing `fork` +// to complete. +static void paf_prepare() { ++gSkipSampling; } + +// In the parent, after the fork, decrease gSkipSampling to let the sampler +// resume sampling (unless other places have made it non-zero as well). +static void paf_parent() { --gSkipSampling; } + +static void PlatformInit(PSLockRef aLock) { + // Set up the fork handlers. + pthread_atfork(paf_prepare, paf_parent, nullptr); +} + +#else + +static void PlatformInit(PSLockRef aLock) {} + +#endif + +#if defined(HAVE_NATIVE_UNWIND) +# define REGISTERS_SYNC_POPULATE(regs) \ + if (!getcontext(®s.mContextSyncStorage)) { \ + PopulateRegsFromContext(regs, ®s.mContextSyncStorage); \ + } +#endif diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp new file mode 100644 index 0000000000..b69a346d64 --- /dev/null +++ b/tools/profiler/core/platform-macos.cpp @@ -0,0 +1,297 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// this port is based off of v8 svn revision 9837 + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) + : mProfiledThread(mach_thread_self()) {} + +mozilla::profiler::PlatformData::~PlatformData() { + // Deallocate Mach port for thread. + mach_port_deallocate(mach_task_self(), mProfiledThread); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + // Microseconds. + aWriter.StringProperty("threadCPUDelta", "\u00B5s"); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + return aRawValue; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + thread_extended_info_data_t threadInfoData; + mach_msg_type_number_t count = THREAD_EXTENDED_INFO_COUNT; + if (thread_info(aPlatformData.ProfiledThread(), THREAD_EXTENDED_INFO, + (thread_info_t)&threadInfoData, &count) != KERN_SUCCESS) { + return false; + } + + *aResult = threadInfoData.pth_user_time + threadInfoData.pth_system_time; + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_task_info); + + static const auto pid = getpid(); + struct proc_taskinfo pti; + if ((unsigned long)proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pti, + PROC_PIDTASKINFO_SIZE) >= + PROC_PIDTASKINFO_SIZE) { + newRunningTimes.SetThreadCPUDelta(pti.pti_total_user + + pti.pti_total_system); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetRunningTimes); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [&platformData](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetRunningTimes_thread_info); + thread_basic_info_data_t threadBasicInfo; + mach_msg_type_number_t basicCount = THREAD_BASIC_INFO_COUNT; + if (thread_info(platformData.ProfiledThread(), THREAD_BASIC_INFO, + reinterpret_cast(&threadBasicInfo), + &basicCount) == KERN_SUCCESS && + basicCount == THREAD_BASIC_INFO_COUNT) { + uint64_t userTimeUs = + uint64_t(threadBasicInfo.user_time.seconds) * + uint64_t(USEC_PER_SEC) + + uint64_t(threadBasicInfo.user_time.microseconds); + uint64_t systemTimeUs = + uint64_t(threadBasicInfo.system_time.seconds) * + uint64_t(USEC_PER_SEC) + + uint64_t(threadBasicInfo.system_time.microseconds); + aRunningTimes.ResetThreadCPUDelta(userTimeUs + systemTimeUs); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + // Nothing to do! + // On macOS, suspending a thread doesn't make that thread work. +} + +template +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + thread_act_t samplee_thread = aThreadData.PlatformDataCRef().ProfiledThread(); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + // We're using thread_suspend on OS X because pthread_kill (which is what we + // at one time used on Linux) has less consistent performance and causes + // strange crashes, see bug 1166778 and bug 1166808. thread_suspend + // is also just a lot simpler to use. + + if (KERN_SUCCESS != thread_suspend(samplee_thread)) { + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + +#if defined(__x86_64__) + thread_state_flavor_t flavor = x86_THREAD_STATE64; + x86_thread_state64_t state; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __r##name +# else +# define REGISTER_FIELD(name) r##name +# endif // __DARWIN_UNIX03 +#elif defined(__aarch64__) + thread_state_flavor_t flavor = ARM_THREAD_STATE64; + arm_thread_state64_t state; + mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __##name +# else +# define REGISTER_FIELD(name) name +# endif // __DARWIN_UNIX03 +#else +# error "unknown architecture" +#endif + + if (thread_get_state(samplee_thread, flavor, + reinterpret_cast(&state), + &count) == KERN_SUCCESS) { + Registers regs; +#if defined(__x86_64__) + regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(ip)); + regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(bp)); + regs.mLR = 0; +#elif defined(__aarch64__) + regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(pc)); + regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(fp)); + regs.mLR = reinterpret_cast
(state.REGISTER_FIELD(lr)); +#else +# error "unknown architecture" +#endif + + aProcessRegs(regs, aNow); + } + +#undef REGISTER_FIELD + + //----------------------------------------------------------------// + // Resume the target thread. + + thread_resume(samplee_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))), + mThread{nullptr} { + pthread_attr_t* attr_ptr = nullptr; + if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } +} + +SamplerThread::~SamplerThread() { + pthread_join(mThread, nullptr); + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + usleep(aMicroseconds); + // FIXME: the OSX 10.12 page for usleep says "The usleep() function is + // obsolescent. Use nanosleep(2) instead." This implementation could be + // merged with the linux-android version. Also, this doesn't handle the + // case where the usleep call is interrupted by a signal. +} + +void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); } + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +// clang-format off +#if defined(HAVE_NATIVE_UNWIND) +// Derive the stack pointer from the frame pointer. The 0x10 offset is +// 8 bytes for the previous frame pointer and 8 bytes for the return +// address both stored on the stack after at the beginning of the current +// frame. +# define REGISTERS_SYNC_POPULATE(regs) \ + regs.mSP = reinterpret_cast
(__builtin_frame_address(0)) + 0x10; \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wframe-address\"") \ + regs.mFP = reinterpret_cast
(__builtin_frame_address(1)); \ + _Pragma("GCC diagnostic pop") \ + regs.mPC = reinterpret_cast
( \ + __builtin_extract_return_addr(__builtin_return_address(0))); \ + regs.mLR = 0; +#endif +// clang-format on diff --git a/tools/profiler/core/platform-win32.cpp b/tools/profiler/core/platform-win32.cpp new file mode 100644 index 0000000000..5e10e04c89 --- /dev/null +++ b/tools/profiler/core/platform-win32.cpp @@ -0,0 +1,496 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include +#include +#include + +#include "nsWindowsDllInterceptor.h" +#include "mozilla/StackWalk_windows.h" +#include "mozilla/WindowsVersion.h" + +#include + +static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) { +#if defined(GP_ARCH_amd64) + aRegs.mPC = reinterpret_cast
(aContext->Rip); + aRegs.mSP = reinterpret_cast
(aContext->Rsp); + aRegs.mFP = reinterpret_cast
(aContext->Rbp); + aRegs.mLR = 0; +#elif defined(GP_ARCH_x86) + aRegs.mPC = reinterpret_cast
(aContext->Eip); + aRegs.mSP = reinterpret_cast
(aContext->Esp); + aRegs.mFP = reinterpret_cast
(aContext->Ebp); + aRegs.mLR = 0; +#elif defined(GP_ARCH_arm64) + aRegs.mPC = reinterpret_cast
(aContext->Pc); + aRegs.mSP = reinterpret_cast
(aContext->Sp); + aRegs.mFP = reinterpret_cast
(aContext->Fp); + aRegs.mLR = reinterpret_cast
(aContext->Lr); +#else +# error "bad arch" +#endif +} + +// Gets a real (i.e. not pseudo) handle for the current thread, with the +// permissions needed for profiling. +// @return a real HANDLE for the current thread. +static HANDLE GetRealCurrentThreadHandleForProfiling() { + HANDLE realCurrentThreadHandle; + if (!::DuplicateHandle( + ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(), + &realCurrentThreadHandle, + THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, + FALSE, 0)) { + return nullptr; + } + + return realCurrentThreadHandle; +} + +static_assert( + std::is_same_v); + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) + : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) { + MOZ_ASSERT(aThreadId == ProfilerThreadId::FromNumber(::GetCurrentThreadId())); +} + +mozilla::profiler::PlatformData::~PlatformData() { + if (mProfiledThread) { + CloseHandle(mProfiledThread); + mProfiledThread = nullptr; + } +} + +static const HANDLE kNoThread = INVALID_HANDLE_VALUE; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + static const Span units = + (GetCycleTimeFrequencyMHz() != 0) ? MakeStringSpan("ns") + : MakeStringSpan("variable CPU cycles"); + aWriter.StringProperty("threadCPUDelta", units); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + static const uint64_t cycleTimeFrequencyMHz = GetCycleTimeFrequencyMHz(); + if (cycleTimeFrequencyMHz == 0u) { + return aRawValue; + } + + constexpr uint64_t GHZ_PER_MHZ = 1'000u; + // To get ns, we need to divide cycles by a frequency in GHz, i.e.: + // cycles / (f_MHz / GHZ_PER_MHZ). To avoid losing the integer precision of + // f_MHz, this is computed as (cycles * GHZ_PER_MHZ) / f_MHz. + // Adding GHZ_PER_MHZ/2 to (cycles * GHZ_PER_MHZ) will round to nearest when + // the result of the division is truncated. + return (aRawValue * GHZ_PER_MHZ + (GHZ_PER_MHZ / 2u)) / cycleTimeFrequencyMHz; +} + +static inline uint64_t ToNanoSeconds(const FILETIME& aFileTime) { + // FILETIME values are 100-nanoseconds units, converting + ULARGE_INTEGER usec = {{aFileTime.dwLowDateTime, aFileTime.dwHighDateTime}}; + return usec.QuadPart * 100; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + const HANDLE profiledThread = aPlatformData.ProfiledThread(); + int frequencyInMHz = GetCycleTimeFrequencyMHz(); + if (frequencyInMHz) { + uint64_t cpuCycleCount; + if (!QueryThreadCycleTime(profiledThread, &cpuCycleCount)) { + return false; + } + + constexpr uint64_t USEC_PER_NSEC = 1000L; + *aResult = cpuCycleCount * USEC_PER_NSEC / frequencyInMHz; + return true; + } + + FILETIME createTime, exitTime, kernelTime, userTime; + if (!GetThreadTimes(profiledThread, &createTime, &exitTime, &kernelTime, + &userTime)) { + return false; + } + + *aResult = ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime); + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + static const HANDLE processHandle = GetCurrentProcess(); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime); + if (ULONG64 cycles; QueryProcessCycleTime(processHandle, &cycles) != 0) { + newRunningTimes.SetThreadCPUDelta(cycles); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetThreadRunningTimes); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + const HANDLE profiledThread = platformData.ProfiledThread(); + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [profiledThread](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime); + if (ULONG64 cycles; + QueryThreadCycleTime(profiledThread, &cycles) != 0) { + aRunningTimes.ResetThreadCPUDelta(cycles); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes); + + // On Windows, suspending a thread makes that thread work a little bit. So we + // want to discard any added running time since the call to + // GetThreadRunningTimesDiff, which is done by overwriting the thread's + // PreviousThreadRunningTimesRef() with the current running time now. + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + const HANDLE profiledThread = platformData.ProfiledThread(); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + + if (ULONG64 cycles; QueryThreadCycleTime(profiledThread, &cycles) != 0) { + previousRunningTimes.ResetThreadCPUDelta(cycles); + } else { + previousRunningTimes.ClearThreadCPUDelta(); + } +} + +template +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + HANDLE profiled_thread = aThreadData.PlatformDataCRef().ProfiledThread(); + if (profiled_thread == nullptr) { + return; + } + + // Context used for sampling the register state of the profiled thread. + CONTEXT context; + memset(&context, 0, sizeof(context)); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + static const DWORD kSuspendFailed = static_cast(-1); + if (SuspendThread(profiled_thread) == kSuspendFailed) { + return; + } + + // SuspendThread is asynchronous, so the thread may still be running. + // Call GetThreadContext first to ensure the thread is really suspended. + // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743. + + // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in + // RtlVirtualUnwind (see bug 1120126) so we set all the flags. +#if defined(GP_ARCH_amd64) + context.ContextFlags = CONTEXT_FULL; +#else + context.ContextFlags = CONTEXT_CONTROL; +#endif + if (!GetThreadContext(profiled_thread, &context)) { + ResumeThread(profiled_thread); + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + + Registers regs; + PopulateRegsFromContext(regs, &context); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + ResumeThread(profiled_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static unsigned int __stdcall ThreadEntry(void* aArg) { + auto thread = static_cast(aArg); + thread->Run(); + return 0; +} + +static unsigned int __stdcall UnregisteredThreadSpyEntry(void* aArg) { + auto thread = static_cast(aArg); + thread->RunUnregisteredThreadSpy(); + return 0; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))), + mNoTimerResolutionChange( + ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) { + if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) { + // By default the timer resolution (which tends to be 1/64Hz, around 16ms) + // is not changed. However, if the requested interval is sufficiently low, + // the resolution will be adjusted to match. Note that this affects all + // timers in Firefox, and could therefore hide issues while profiling. This + // change may be prevented with the "notimerresolutionchange" feature. + ::timeBeginPeriod(mIntervalMicroseconds / 1000); + } + + if (ProfilerFeature::HasUnregisteredThreads(aFeatures)) { + // Sampler&spy threads are not running yet, so it's safe to modify + // mSpyingState without locking the monitor. + mSpyingState = SpyingState::Spy_Initializing; + mUnregisteredThreadSpyThread = reinterpret_cast( + _beginthreadex(nullptr, + /* stack_size */ 0, UnregisteredThreadSpyEntry, this, + /* initflag */ 0, nullptr)); + if (mUnregisteredThreadSpyThread == 0) { + MOZ_CRASH("_beginthreadex failed"); + } + } + + // Create a new thread. It is important to use _beginthreadex() instead of + // the Win32 function CreateThread(), because the CreateThread() does not + // initialize thread-specific structures in the C runtime library. + mThread = reinterpret_cast(_beginthreadex(nullptr, + /* stack_size */ 0, + ThreadEntry, this, + /* initflag */ 0, nullptr)); + if (mThread == 0) { + MOZ_CRASH("_beginthreadex failed"); + } +} + +SamplerThread::~SamplerThread() { + if (mUnregisteredThreadSpyThread) { + { + // Make sure the spying thread is not actively working, because the win32 + // function it's using could deadlock with WaitForSingleObject below. + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + while (mSpyingState != SpyingState::Spy_Waiting && + mSpyingState != SpyingState::SamplerToSpy_Start) { + spyingStateLock.Wait(); + } + + mSpyingState = SpyingState::MainToSpy_Shutdown; + spyingStateLock.NotifyAll(); + + do { + spyingStateLock.Wait(); + } while (mSpyingState != SpyingState::SpyToMain_ShuttingDown); + } + + WaitForSingleObject(mUnregisteredThreadSpyThread, INFINITE); + + // Close our own handle for the thread. + if (mUnregisteredThreadSpyThread != kNoThread) { + CloseHandle(mUnregisteredThreadSpyThread); + } + } + + WaitForSingleObject(mThread, INFINITE); + + // Close our own handle for the thread. + if (mThread != kNoThread) { + CloseHandle(mThread); + } + + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::RunUnregisteredThreadSpy() { + // TODO: Consider registering this thread. + // Pros: Remove from list of unregistered threads; Not useful to profiling + // Firefox itself. + // Cons: Doesn't appear in the profile, so users may miss the expensive CPU + // cost of this work on Windows. + PR_SetCurrentThreadName("UnregisteredThreadSpy"); + + while (true) { + { + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + // Either this is the first loop, or we're looping after working. + MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing || + mSpyingState == SpyingState::Spy_Working); + + // Let everyone know we're waiting, and then wait. + mSpyingState = SpyingState::Spy_Waiting; + mSpyingStateMonitor.NotifyAll(); + do { + spyingStateLock.Wait(); + } while (mSpyingState == SpyingState::Spy_Waiting); + + if (mSpyingState == SpyingState::MainToSpy_Shutdown) { + mSpyingState = SpyingState::SpyToMain_ShuttingDown; + mSpyingStateMonitor.NotifyAll(); + break; + } + + MOZ_ASSERT(mSpyingState == SpyingState::SamplerToSpy_Start); + mSpyingState = SpyingState::Spy_Working; + } + + // Do the work without lock, so other threads can read the current state. + SpyOnUnregisteredThreads(); + } +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + // For now, keep the old behaviour of minimum Sleep(1), even for + // smaller-than-usual sleeps after an overshoot, unless the user has + // explicitly opted into a sub-millisecond profiler interval. + if (mIntervalMicroseconds >= 1000) { + ::Sleep(std::max(1u, aMicroseconds / 1000)); + } else { + TimeStamp start = TimeStamp::Now(); + TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds); + + // First, sleep for as many whole milliseconds as possible. + if (aMicroseconds >= 1000) { + ::Sleep(aMicroseconds / 1000); + } + + // Then, spin until enough time has passed. + while (TimeStamp::Now() < end) { + YieldProcessor(); + } + } +} + +void SamplerThread::Stop(PSLockRef aLock) { + if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) { + // Disable any timer resolution changes we've made. Do it now while + // gPSMutex is locked, i.e. before any other SamplerThread can be created + // and call ::timeBeginPeriod(). + // + // It's safe to do this now even though this SamplerThread is still alive, + // because the next time the main loop of Run() iterates it won't get past + // the mActivityGeneration check, and so it won't make any more ::Sleep() + // calls. + ::timeEndPeriod(mIntervalMicroseconds / 1000); + } + + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +#if defined(HAVE_NATIVE_UNWIND) +# define REGISTERS_SYNC_POPULATE(regs) \ + CONTEXT context; \ + RtlCaptureContext(&context); \ + PopulateRegsFromContext(regs, &context); +#endif + +#if defined(GP_PLAT_amd64_windows) + +// Use InitializeWin64ProfilerHooks from the base profiler. + +namespace mozilla { +namespace baseprofiler { +MFBT_API void InitializeWin64ProfilerHooks(); +} // namespace baseprofiler +} // namespace mozilla + +using mozilla::baseprofiler::InitializeWin64ProfilerHooks; + +#endif // defined(GP_PLAT_amd64_windows) diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp new file mode 100644 index 0000000000..8950c48b58 --- /dev/null +++ b/tools/profiler/core/platform.cpp @@ -0,0 +1,7067 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// There are three kinds of samples done by the profiler. +// +// - A "periodic" sample is the most complex kind. It is done in response to a +// timer while the profiler is active. It involves writing a stack trace plus +// a variety of other values (memory measurements, responsiveness +// measurements, markers, etc.) into the main ProfileBuffer. The sampling is +// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to +// get the register values. +// +// - A "synchronous" sample is a simpler kind. It is done in response to an API +// call (profiler_get_backtrace()). It involves writing a stack trace and +// little else into a temporary ProfileBuffer, and wrapping that up in a +// ProfilerBacktrace that can be subsequently used in a marker. The sampling +// is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the +// register values. +// +// - A "backtrace" sample is the simplest kind. It is done in response to an +// API call (profiler_suspend_and_sample_thread()). It involves getting a +// stack trace via a ProfilerStackCollector; it does not write to a +// ProfileBuffer. The sampling is done from off-thread, and so uses +// SuspendAndSampleAndResumeThread() to get the register values. + +#include "platform.h" + +#include "GeckoProfiler.h" +#include "GeckoProfilerReporter.h" +#include "PageInformation.h" +#include "PowerCounters.h" +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfilerChild.h" +#include "ProfilerCodeAddressService.h" +#include "ProfilerControl.h" +#include "ProfilerIOInterposeObserver.h" +#include "ProfilerParent.h" +#include "ProfilerRustBindings.h" +#include "mozilla/MozPromise.h" +#include "shared-libraries.h" +#include "VTuneProfiler.h" + +#include "js/ProfilingFrameIterator.h" +#include "memory_hooks.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/AutoProfilerLabel.h" +#include "mozilla/BaseAndGeckoProfilerDetail.h" +#include "mozilla/ExtensionPolicyService.h" +#include "mozilla/extensions/WebExtensionPolicy.h" +#include "mozilla/glean/GleanMetrics.h" +#include "mozilla/Monitor.h" +#include "mozilla/Preferences.h" +#include "mozilla/Printf.h" +#include "mozilla/ProcInfo.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h" +#include "mozilla/ProfileChunkedBuffer.h" +#include "mozilla/SchedulerGroup.h" +#include "mozilla/Services.h" +#include "mozilla/StackWalk.h" +#ifdef XP_WIN +# include "mozilla/StackWalkThread.h" +#endif +#include "mozilla/StaticPtr.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "BaseProfiler.h" +#include "nsDirectoryServiceDefs.h" +#include "nsDirectoryServiceUtils.h" +#include "nsIDocShell.h" +#include "nsIHttpProtocolHandler.h" +#include "nsIObserverService.h" +#include "nsIPropertyBag2.h" +#include "nsIXULAppInfo.h" +#include "nsIXULRuntime.h" +#include "nsJSPrincipals.h" +#include "nsMemoryReporterManager.h" +#include "nsPIDOMWindow.h" +#include "nsProfilerStartParams.h" +#include "nsScriptSecurityManager.h" +#include "nsSystemInfo.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "Tracing.h" +#include "prdtoa.h" +#include "prtime.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(GP_OS_android) +# include "JavaExceptions.h" +# include "mozilla/java/GeckoJavaSamplerNatives.h" +# include "mozilla/jni/Refs.h" +#endif + +#if defined(GP_OS_darwin) +# include "nsCocoaFeatures.h" +#endif + +#if defined(GP_PLAT_amd64_darwin) +# include +#endif + +#if defined(GP_OS_windows) +# include + +// GetThreadInformation is not available on Windows 7. +WINBASEAPI +BOOL WINAPI GetThreadInformation( + _In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass, + _Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation, + _In_ DWORD ThreadInformationSize); + +#endif + +// Win32 builds always have frame pointers, so FramePointerStackWalk() always +// works. +#if defined(GP_PLAT_x86_windows) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Win64 builds always omit frame pointers, so we use the slower +// MozStackWalk(), which works in that case. +#if defined(GP_PLAT_amd64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower +// MozStackWalk(). +#if defined(GP_PLAT_arm64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// Mac builds use FramePointerStackWalk(). Even if we build without +// frame pointers, we'll still get useful stacks in system libraries +// because those always have frame pointers. +// We don't use MozStackWalk() on Mac. +#if defined(GP_OS_darwin) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Android builds use the ARM Exception Handling ABI to unwind. +#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define HAVE_NATIVE_UNWIND +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +#endif + +// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ + defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ + defined(GP_PLAT_arm64_freebsd) +# define HAVE_NATIVE_UNWIND +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" + +// On linux we use LUL for periodic samples and synchronous samples, but we use +// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. +// (See the comment at the top of the file for a definition of +// periodic/synchronous/backtrace.). +// +// FramePointerStackWalk can produce incomplete stacks when the current entry is +// in a shared library without framepointers, however LUL can take a long time +// to initialize, which is undesirable for consumers of +// profiler_suspend_and_sample_thread like the Background Hang Reporter. +# if defined(MOZ_PROFILING) +# define USE_FRAME_POINTER_STACK_WALK +# endif +#endif + +// We can only stackwalk without expensive initialization on platforms which +// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires +// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of +// which can be expensive. +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +# define HAVE_FASTINIT_NATIVE_UNWIND +#endif + +#ifdef MOZ_VALGRIND +# include +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0) +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include +#endif + +using namespace mozilla; +using namespace mozilla::literals::ProportionValue_literals; + +using mozilla::profiler::detail::RacyFeatures; +using ThreadRegistration = mozilla::profiler::ThreadRegistration; +using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo; +using ThreadRegistry = mozilla::profiler::ThreadRegistry; + +LazyLogModule gProfilerLog("prof"); + +ProfileChunkedBuffer& profiler_get_core_buffer() { + // Defer to the Base Profiler in mozglue to create the core buffer if needed, + // and keep a reference here, for quick access in xul. + static ProfileChunkedBuffer& sProfileChunkedBuffer = + baseprofiler::profiler_get_core_buffer(); + return sProfileChunkedBuffer; +} + +mozilla::Atomic gSkipSampling; + +#if defined(GP_OS_android) +class GeckoJavaSampler + : public java::GeckoJavaSampler::Natives { + private: + GeckoJavaSampler(); + + public: + static double GetProfilerTime() { + if (!profiler_is_active()) { + return 0.0; + } + return profiler_time(); + }; + + static void JavaStringArrayToCharArray(jni::ObjectArray::Param& aJavaArray, + Vector& aCharArray, + JNIEnv* aJni) { + int arraySize = aJavaArray->Length(); + for (int i = 0; i < arraySize; i++) { + jstring javaString = + (jstring)(aJni->GetObjectArrayElement(aJavaArray.Get(), i)); + const char* filterString = aJni->GetStringUTFChars(javaString, 0); + // FIXME. These strings are leaked. + MOZ_RELEASE_ASSERT(aCharArray.append(filterString)); + } + } + + static void StartProfiler(jni::ObjectArray::Param aFiltersArray, + jni::ObjectArray::Param aFeaturesArray) { + JNIEnv* jni = jni::GetEnvForThread(); + Vector filtersTemp; + Vector featureStringArray; + + JavaStringArrayToCharArray(aFiltersArray, filtersTemp, jni); + JavaStringArrayToCharArray(aFeaturesArray, featureStringArray, jni); + + uint32_t features = 0; + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length()); + + // 128 * 1024 * 1024 is the entries preset that is given in + // devtools/client/performance-new/shared/background.jsm.js + profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features, + filtersTemp.begin(), filtersTemp.length(), 0, Nothing()); + } + + static void StopProfiler(jni::Object::Param aGeckoResult) { + auto result = java::GeckoResult::LocalRef(aGeckoResult); + profiler_pause(); + nsCOMPtr nsProfiler( + do_GetService("@mozilla.org/tools/profiler;1")); + nsProfiler->GetProfileDataAsGzippedArrayBufferAndroid(0)->Then( + GetMainThreadSerialEventTarget(), __func__, + [result](FallibleTArray compressedProfile) { + result->Complete(jni::ByteArray::New( + reinterpret_cast(compressedProfile.Elements()), + compressedProfile.Length())); + + // Done with capturing a profile. Stop the profiler. + profiler_stop(); + }, + [result](nsresult aRv) { + char errorString[9]; + sprintf(errorString, "%08x", aRv); + result->CompleteExceptionally( + mozilla::java::sdk::IllegalStateException::New(errorString) + .Cast()); + + // Failed to capture a profile. Stop the profiler. + profiler_stop(); + }); + } +}; +#endif + +constexpr static bool ValidateFeatures() { + int expectedFeatureNumber = 0; + + // Feature numbers should start at 0 and increase by 1 each. +#define CHECK_FEATURE(n_, str_, Name_, desc_) \ + if ((n_) != expectedFeatureNumber) { \ + return false; \ + } \ + ++expectedFeatureNumber; + + PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE) + +#undef CHECK_FEATURE + + return true; +} + +static_assert(ValidateFeatures(), "Feature list is invalid"); + +// Return all features that are available on this platform. +static uint32_t AvailableFeatures() { + uint32_t features = 0; + +#define ADD_FEATURE(n_, str_, Name_, desc_) \ + ProfilerFeature::Set##Name_(features); + + // Add all the possible features. + PROFILER_FOR_EACH_FEATURE(ADD_FEATURE) + +#undef ADD_FEATURE + + // Now remove features not supported on this platform/configuration. +#if !defined(GP_OS_android) + ProfilerFeature::ClearJava(features); +#endif +#if !defined(HAVE_NATIVE_UNWIND) + ProfilerFeature::ClearStackWalk(features); +#endif +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (getenv("XPCOM_MEM_BLOAT_LOG")) { + NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations."); + // The memory hooks are available, but the bloat log is enabled, which is + // not compatible with the native allocations tracking. See the comment in + // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for + // more information. + ProfilerFeature::ClearNativeAllocations(features); + } +#else + // The memory hooks are not available. + ProfilerFeature::ClearNativeAllocations(features); +#endif + +#if !defined(GP_OS_windows) + ProfilerFeature::ClearNoTimerResolutionChange(features); +#endif + + return features; +} + +// Default features common to all contexts (even if not available). +static constexpr uint32_t DefaultFeatures() { + return ProfilerFeature::Java | ProfilerFeature::JS | + ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization | + ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU; +} + +// Extra default features when MOZ_PROFILER_STARTUP is set (even if not +// available). +static constexpr uint32_t StartupExtraDefaultFeatures() { + // Enable file I/Os by default for startup profiles as startup is heavy on + // I/O operations. + return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages; +} + +Json::String ToCompactString(const Json::Value& aJsonValue) { + Json::StreamWriterBuilder builder; + // No indentations, and no newlines. + builder["indentation"] = ""; + // This removes spaces after colons. + builder["enableYAMLCompatibility"] = false; + // Only 6 digits after the decimal point; timestamps in ms have ns precision. + builder["precision"] = 6; + builder["precisionType"] = "decimal"; + + return Json::writeString(builder, aJsonValue); +} + +/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex + ProfilingLog::gMutex; +/* static */ mozilla::UniquePtr ProfilingLog::gLog; + +/* static */ void ProfilingLog::Init() { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + MOZ_ASSERT(!gLog); + gLog = mozilla::MakeUniqueFallible(Json::objectValue); + if (gLog) { + (*gLog)[Json::StaticString{"profilingLogBegin" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + } +} + +/* static */ void ProfilingLog::Destroy() { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + MOZ_ASSERT(gLog); + gLog = nullptr; +} + +/* static */ bool ProfilingLog::IsLockedOnCurrentThread() { + return gMutex.IsLockedOnCurrentThread(); +} + +// RAII class to lock the profiler mutex. +// It provides a mechanism to determine if it is locked or not in order for +// memory hooks to avoid re-entering the profiler locked state. +// Locking order: Profiler, ThreadRegistry, ThreadRegistration. +class MOZ_RAII PSAutoLock { + public: + PSAutoLock() + : mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& { + // In DEBUG builds, *before* we attempt to lock gPSMutex, we want to + // check that the ThreadRegistry, ThreadRegistration, and ProfilingLog + // mutexes are *not* locked on this thread, to avoid inversion + // deadlocks. + MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread()); + MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread()); + MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread()); + return gPSMutex; + }()) {} + + PSAutoLock(const PSAutoLock&) = delete; + void operator=(const PSAutoLock&) = delete; + + static bool IsLockedOnCurrentThread() { + return gPSMutex.IsLockedOnCurrentThread(); + } + + private: + static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex; + mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock; +}; + +/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex + PSAutoLock::gPSMutex{"Gecko Profiler mutex"}; + +// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's +// fields. +typedef const PSAutoLock& PSLockRef; + +#define PS_GET(type_, name_) \ + static type_ name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_LOCKLESS(type_, name_) \ + static type_ name_() { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_AND_SET(type_, name_) \ + PS_GET(type_, name_) \ + static void Set##name_(PSLockRef, type_ a##name_) { \ + MOZ_ASSERT(sInstance); \ + sInstance->m##name_ = a##name_; \ + } + +static constexpr size_t MAX_JS_FRAMES = + mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES; +using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame; +using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer; + +// All functions in this file can run on multiple threads unless they have an +// NS_IsMainThread() assertion. + +// This class contains the profiler's core global state, i.e. that which is +// valid even when the profiler is not active. Most profile operations can't do +// anything useful when this class is not instantiated, so we release-assert +// its non-nullness in all such operations. +// +// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a +// PSAutoLock reference as an argument as proof that the gPSMutex is currently +// locked. This makes it clear when gPSMutex is locked and helps avoid +// accidental unlocked accesses to global state. There are ways to circumvent +// this mechanism, but please don't do so without *very* good reason and a +// detailed explanation. +// +// The exceptions to this rule: +// +// - mProcessStartTime, because it's immutable; +class CorePS { + private: + CorePS() + : mProcessStartTime(TimeStamp::ProcessCreation()) +#ifdef USE_LUL_STACKWALK + , + mLul(nullptr) +#endif + { + MOZ_ASSERT(NS_IsMainThread(), + "CorePS must be created from the main thread"); + } + + ~CorePS() { +#ifdef USE_LUL_STACKWALK + delete sInstance->mLul; +#endif + } + + public: + static void Create(PSLockRef aLock) { + MOZ_ASSERT(!sInstance); + sInstance = new CorePS(); + } + + static void Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + delete sInstance; + sInstance = nullptr; + } + + // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex + // being locked. This is because CorePS is instantiated so early on the main + // thread that we don't have to worry about it being racy. + static bool Exists() { return !!sInstance; } + + static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf, + size_t& aProfSize, size_t& aLulSize) { + MOZ_ASSERT(sInstance); + + aProfSize += aMallocSizeOf(sInstance); + + aProfSize += ThreadRegistry::SizeOfIncludingThis(aMallocSizeOf); + + for (auto& registeredPage : sInstance->mRegisteredPages) { + aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf); + } + + // Measurement of the following things may be added later if DMD finds it + // is worthwhile: + // - CorePS::mRegisteredPages itself (its elements' children are + // measured above) + +#if defined(USE_LUL_STACKWALK) + if (lul::LUL* lulPtr = sInstance->mLul; lulPtr) { + aLulSize += lulPtr->SizeOfIncludingThis(aMallocSizeOf); + } +#endif + } + + // No PSLockRef is needed for this field because it's immutable. + PS_GET_LOCKLESS(TimeStamp, ProcessStartTime) + + PS_GET(JsFrameBuffer&, JsFrames) + + PS_GET(Vector>&, RegisteredPages) + + static void AppendRegisteredPage(PSLockRef, + RefPtr&& aRegisteredPage) { + MOZ_ASSERT(sInstance); + struct RegisteredPageComparator { + PageInformation* aA; + bool operator()(PageInformation* aB) const { return aA->Equals(aB); } + }; + + auto foundPageIter = std::find_if( + sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(), + RegisteredPageComparator{aRegisteredPage.get()}); + + if (foundPageIter != sInstance->mRegisteredPages.end()) { + if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) { + // When a BrowsingContext is loaded, the first url loaded in it will be + // about:blank, and if the principal matches, the first document loaded + // in it will share an inner window. That's why we should delete the + // intermittent about:blank if they share the inner window. + sInstance->mRegisteredPages.erase(foundPageIter); + } else { + // Do not register the same page again. + return; + } + } + + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredPages.append(std::move(aRegisteredPage))); + } + + static void RemoveRegisteredPage(PSLockRef, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + // Remove RegisteredPage from mRegisteredPages by given inner window ID. + sInstance->mRegisteredPages.eraseIf([&](const RefPtr& rd) { + return rd->InnerWindowID() == aRegisteredInnerWindowID; + }); + } + + static void ClearRegisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mRegisteredPages.clear(); + } + + PS_GET(const Vector&, Counters) + + static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) { + MOZ_ASSERT(sInstance); + // we don't own the counter; they may be stored in static objects + MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter)); + } + + static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { + // we may be called to remove a counter after the profiler is stopped or + // late in shutdown. + if (sInstance) { + auto* counter = std::find(sInstance->mCounters.begin(), + sInstance->mCounters.end(), aCounter); + MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end()); + sInstance->mCounters.erase(counter); + } + } + +#ifdef USE_LUL_STACKWALK + static lul::LUL* Lul() { + MOZ_RELEASE_ASSERT(sInstance); + return sInstance->mLul; + } + static void SetLul(UniquePtr aLul) { + MOZ_RELEASE_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mLul.compareExchange(nullptr, aLul.release())); + } +#endif + + PS_GET_AND_SET(const nsACString&, ProcessName) + PS_GET_AND_SET(const nsACString&, ETLDplus1) + + private: + // The singleton instance + static CorePS* sInstance; + + // The time that the process started. + const TimeStamp mProcessStartTime; + + // Info on all the registered pages. + // InnerWindowIDs in mRegisteredPages are unique. + Vector> mRegisteredPages; + + // Non-owning pointers to all active counters + Vector mCounters; + +#ifdef USE_LUL_STACKWALK + // LUL's state. Null prior to the first activation, non-null thereafter. + // Owned by this CorePS. + mozilla::Atomic mLul; +#endif + + // Process name, provided by child process initialization code. + nsAutoCString mProcessName; + // Private name, provided by child process initialization code (eTLD+1 in + // fission) + nsAutoCString mETLDplus1; + + // This memory buffer is used by the MergeStacks mechanism. Previously it was + // stack allocated, but this led to a stack overflow, as it was too much + // memory. Here the buffer can be pre-allocated, and shared with the + // MergeStacks feature as needed. MergeStacks is only run while holding the + // lock, so it is safe to have only one instance allocated for all of the + // threads. + JsFrameBuffer mJsFrames; +}; + +CorePS* CorePS::sInstance = nullptr; + +void locked_profiler_add_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + CorePS::AppendCounter(aLock, aCounter); +} + +void locked_profiler_remove_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + // Note: we don't enforce a final sample, though we could do so if the + // profiler was active + CorePS::RemoveCounter(aLock, aCounter); +} + +class SamplerThread; + +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures); + +struct LiveProfiledThreadData { + UniquePtr mProfiledThreadData; +}; + +// The buffer size is provided as a number of "entries", this is their size in +// bytes. +constexpr static uint32_t scBytesPerEntry = 8; + +// This class contains the profiler's global state that is valid only when the +// profiler is active. When not instantiated, the profiler is inactive. +// +// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as +// CorePS. +// +class ActivePS { + private: + // We need to decide how many chunks of what size we want to fit in the given + // total maximum capacity for this process, in the (likely) context of + // multiple processes doing the same choice and having an inter-process + // mechanism to control the overal memory limit. + + // Minimum chunk size allowed, enough for at least one stack. + constexpr static uint32_t scMinimumChunkSize = + 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize; + + // Ideally we want at least 2 unreleased chunks to work with (1 current and 1 + // next), and 2 released chunks (so that one can be recycled when old, leaving + // one with some data). + constexpr static uint32_t scMinimumNumberOfChunks = 4; + + // And we want to limit chunks to a maximum size, which is a compromise + // between: + // - A big size, which helps with reducing the rate of allocations and IPCs. + // - A small size, which helps with equalizing the duration of recorded data + // (as the inter-process controller will discard the oldest chunks in all + // Firefox processes). + constexpr static uint32_t scMaximumChunkSize = 1024 * 1024; + + public: + // We should be able to store at least the minimum number of the smallest- + // possible chunks. + constexpr static uint32_t scMinimumBufferSize = + scMinimumNumberOfChunks * scMinimumChunkSize; + // Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler: + // https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java + constexpr static uint32_t scMinimumBufferEntries = + scMinimumBufferSize / scBytesPerEntry; + + // Limit to 2GiB. + constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u; + constexpr static uint32_t scMaximumBufferEntries = + scMaximumBufferSize / scBytesPerEntry; + + constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) { + if (aEntries <= scMinimumBufferEntries) { + return scMinimumBufferEntries; + } + if (aEntries >= scMaximumBufferEntries) { + return scMaximumBufferEntries; + } + return aEntries; + } + + private: + constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { + return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) * + scBytesPerEntry / scMinimumNumberOfChunks, + size_t(scMaximumChunkSize))); + } + + static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { + // Filter out any features unavailable in this platform/configuration. + aFeatures &= AvailableFeatures(); + + // Some features imply others. + if (aFeatures & ProfilerFeature::FileIOAll) { + aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO; + } else if (aFeatures & ProfilerFeature::FileIO) { + aFeatures |= ProfilerFeature::MainThreadIO; + } + + if (aFeatures & ProfilerFeature::CPUAllThreads) { + aFeatures |= ProfilerFeature::CPUUtilization; + } + + return aFeatures; + } + + bool ShouldInterposeIOs() { + return ProfilerFeature::HasMainThreadIO(mFeatures) || + ProfilerFeature::HasFileIO(mFeatures) || + ProfilerFeature::HasFileIOAll(mFeatures); + } + + ActivePS( + PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe& aDuration, + UniquePtr aChunkManagerOrNull) + : mProfilingStartTime(aProfilingStartTime), + mGeneration(sNextGeneration++), + mCapacity(aCapacity), + mDuration(aDuration), + mInterval(aInterval), + mFeatures(AdjustFeatures(aFeatures, aFilterCount)), + mActiveTabID(aActiveTabID), + mProfileBufferChunkManager( + aChunkManagerOrNull + ? std::move(aChunkManagerOrNull) + : MakeUnique( + size_t(ClampToAllowedEntries(aCapacity.Value())) * + scBytesPerEntry, + ChunkSizeForEntries(aCapacity.Value()))), + mProfileBuffer([this]() -> ProfileChunkedBuffer& { + ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer(); + coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager); + return coreBuffer; + }()), + mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures) + ? new ProcessCPUCounter(aLock) + : nullptr), + mMaybePowerCounters(nullptr), + // The new sampler thread doesn't start sampling immediately because the + // main loop within Run() is blocked until this function's caller + // unlocks gPSMutex. + mSamplerThread( + NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)), + mIsPaused(false), + mIsSamplingPaused(false) { + ProfilingLog::Init(); + + // Deep copy and lower-case aFilters. + MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount)); + MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mFilters[i] = aFilters[i]; + mFiltersLowered[i].reserve(mFilters[i].size()); + std::transform(mFilters[i].cbegin(), mFilters[i].cend(), + std::back_inserter(mFiltersLowered[i]), ::tolower); + } + +#if !defined(RELEASE_OR_BETA) + if (ShouldInterposeIOs()) { + // We need to register the observer on the main thread, because we want + // to observe IO that happens on the main thread. + // IOInterposer needs to be initialized before calling + // IOInterposer::Register or our observer will be silently dropped. + if (NS_IsMainThread()) { + IOInterposer::Init(); + IOInterposer::Register(IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ActivePS::ActivePS", []() { + // Note: This could theoretically happen after ActivePS gets + // destroyed, but it's ok: + // - The Observer always checks that the profiler is (still) + // active before doing its work. + // - The destruction should happen on the same thread as this + // construction, so the un-registration will also be dispatched + // and queued on the main thread, and run after this. + IOInterposer::Init(); + IOInterposer::Register( + IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + })); + } + } +#endif + + if (ProfilerFeature::HasPower(aFeatures)) { + mMaybePowerCounters = new PowerCounters(); + for (const auto& powerCounter : mMaybePowerCounters->GetCounters()) { + locked_profiler_add_sampled_counter(aLock, powerCounter); + } + } + } + + ~ActivePS() { + MOZ_ASSERT( + !mMaybeProcessCPUCounter, + "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()"); + MOZ_ASSERT( + !mMaybePowerCounters, + "mMaybePowerCounters should have been deleted before ~ActivePS()"); + +#if !defined(RELEASE_OR_BETA) + if (ShouldInterposeIOs()) { + // We need to unregister the observer on the main thread, because that's + // where we've registered it. + if (NS_IsMainThread()) { + IOInterposer::Unregister(IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ActivePS::~ActivePS", []() { + IOInterposer::Unregister( + IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + })); + } + } +#endif + if (mProfileBufferChunkManager) { + // We still control the chunk manager, remove it from the core buffer. + profiler_get_core_buffer().ResetChunkManager(); + } + + ProfilingLog::Destroy(); + } + + bool ThreadSelected(const char* aThreadName) { + if (mFiltersLowered.empty()) { + return true; + } + + std::string name = aThreadName; + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (const auto& filter : mFiltersLowered) { + if (filter == "*") { + return true; + } + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + + // If the filter is "pid:", profile all threads. + if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { + return true; + } + } + + return false; + } + + public: + static void Create( + PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe& aDuration, + UniquePtr aChunkManagerOrNull) { + MOZ_ASSERT(!sInstance); + sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval, + aFeatures, aFilters, aFilterCount, aActiveTabID, + aDuration, std::move(aChunkManagerOrNull)); + } + + [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + if (sInstance->mMaybeProcessCPUCounter) { + locked_profiler_remove_sampled_counter( + aLock, sInstance->mMaybeProcessCPUCounter); + delete sInstance->mMaybeProcessCPUCounter; + sInstance->mMaybeProcessCPUCounter = nullptr; + } + + if (sInstance->mMaybePowerCounters) { + for (const auto& powerCounter : + sInstance->mMaybePowerCounters->GetCounters()) { + locked_profiler_remove_sampled_counter(aLock, powerCounter); + } + delete sInstance->mMaybePowerCounters; + sInstance->mMaybePowerCounters = nullptr; + } + + auto samplerThread = sInstance->mSamplerThread; + delete sInstance; + sInstance = nullptr; + + return samplerThread; + } + + static bool Exists(PSLockRef) { return !!sInstance; } + + static bool Equals(PSLockRef, PowerOfTwo32 aCapacity, + const Maybe& aDuration, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID) { + MOZ_ASSERT(sInstance); + if (sInstance->mCapacity != aCapacity || + sInstance->mDuration != aDuration || + sInstance->mInterval != aInterval || + sInstance->mFeatures != aFeatures || + sInstance->mFilters.length() != aFilterCount || + sInstance->mActiveTabID != aActiveTabID) { + return false; + } + + for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) { + if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) { + return false; + } + } + return true; + } + + static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(sInstance); + + size_t n = aMallocSizeOf(sInstance); + + n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mLiveProfiledThreads (both the array itself, and the contents) + // - mDeadProfiledThreads (both the array itself, and the contents) + // + + return n; + } + + static ThreadProfilingFeatures ProfilingFeaturesForThread( + PSLockRef aLock, const ThreadRegistrationInfo& aInfo) { + MOZ_ASSERT(sInstance); + if (sInstance->ThreadSelected(aInfo.Name())) { + // This thread was selected by the user, record everything. + return ThreadProfilingFeatures::Any; + } + ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled; + if (ActivePS::FeatureCPUAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::CPUUtilization); + } + if (ActivePS::FeatureSamplingAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::Sampling); + } + if (ActivePS::FeatureMarkersAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::Markers); + } + return features; + } + + [[nodiscard]] static bool AppendPostSamplingCallback( + PSLockRef, PostSamplingCallback&& aCallback); + + // Writes out the current active configuration of the profile. + static void WriteActiveConfiguration( + PSLockRef aLock, JSONWriter& aWriter, + const Span& aPropertyName = MakeStringSpan("")) { + if (!sInstance) { + if (!aPropertyName.empty()) { + aWriter.NullProperty(aPropertyName); + } else { + aWriter.NullElement(); + } + return; + }; + + if (!aPropertyName.empty()) { + aWriter.StartObjectProperty(aPropertyName); + } else { + aWriter.StartObjectElement(); + } + + { + aWriter.StartArrayProperty("features"); +#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \ + if (profiler_feature_active(ProfilerFeature::Name_)) { \ + aWriter.StringElement(str_); \ + } + + PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES) +#undef WRITE_ACTIVE_FEATURES + aWriter.EndArray(); + } + { + aWriter.StartArrayProperty("threads"); + for (const auto& filter : sInstance->mFilters) { + aWriter.StringElement(filter); + } + aWriter.EndArray(); + } + { + // Now write all the simple values. + + // The interval is also available on profile.meta.interval + aWriter.DoubleProperty("interval", sInstance->mInterval); + aWriter.IntProperty("capacity", sInstance->mCapacity.Value()); + if (sInstance->mDuration) { + aWriter.DoubleProperty("duration", sInstance->mDuration.value()); + } + // Here, we are converting uint64_t to double. Tab IDs are + // being created using `nsContentUtils::GenerateProcessSpecificId`, which + // is specifically designed to only use 53 of the 64 bits to be lossless + // when passed into and out of JS as a double. + aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID); + } + aWriter.EndObject(); + } + + PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime) + + PS_GET(uint32_t, Generation) + + PS_GET(PowerOfTwo32, Capacity) + + PS_GET(Maybe, Duration) + + PS_GET(double, Interval) + + PS_GET(uint32_t, Features) + + PS_GET(uint64_t, ActiveTabID) + +#define PS_GET_FEATURE(n_, str_, Name_, desc_) \ + static bool Feature##Name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return ProfilerFeature::Has##Name_(sInstance->mFeatures); \ + } + + PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE) + +#undef PS_GET_FEATURE + + static uint32_t JSFlags(PSLockRef aLock) { + uint32_t Flags = 0; + Flags |= + FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0; + + Flags |= FeatureJSAllocations(aLock) + ? uint32_t(JSInstrumentationFlags::Allocations) + : 0; + return Flags; + } + + PS_GET(const Vector&, Filters) + PS_GET(const Vector&, FiltersLowered) + + // Not using PS_GET, because only the "Controlled" interface of + // `mProfileBufferChunkManager` should be exposed here. + static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager( + PSLockRef) { + MOZ_ASSERT(sInstance); + MOZ_ASSERT(sInstance->mProfileBufferChunkManager); + return *sInstance->mProfileBufferChunkManager; + } + + static void FulfillChunkRequests(PSLockRef) { + MOZ_ASSERT(sInstance); + if (sInstance->mProfileBufferChunkManager) { + sInstance->mProfileBufferChunkManager->FulfillChunkRequests(); + } + } + + static ProfileBuffer& Buffer(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mProfileBuffer; + } + + static const Vector& LiveProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLiveProfiledThreads; + } + + struct ProfiledThreadListElement { + TimeStamp mRegisterTime; + JSContext* mJSContext; // Null for unregistered threads. + ProfiledThreadData* mProfiledThreadData; + }; + using ProfiledThreadList = Vector; + + // Returns a ProfiledThreadList with all threads that should be included in a + // profile, both for threads that are still registered, and for threads that + // have been unregistered but still have data in the buffer. + // The returned array is sorted by thread register time. + // Do not hold on to the return value past LockedRegistry. + static ProfiledThreadList ProfiledThreads( + ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) { + MOZ_ASSERT(sInstance); + ProfiledThreadList array; + MOZ_RELEASE_ASSERT( + array.initCapacity(sInstance->mLiveProfiledThreads.length() + + sInstance->mDeadProfiledThreads.length())); + + for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) { + ProfiledThreadData* profiledThreadData = + offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData( + aLock); + if (!profiledThreadData) { + // This thread was not profiled, continue with the next one. + continue; + } + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.GetLockedRWFromAnyThread(); + MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{ + profiledThreadData->Info().RegisterTime(), + lockedThreadData->GetJSContext(), profiledThreadData})); + } + + for (auto& t : sInstance->mDeadProfiledThreads) { + MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{ + t->Info().RegisterTime(), (JSContext*)nullptr, t.get()})); + } + + std::sort(array.begin(), array.end(), + [](const ProfiledThreadListElement& a, + const ProfiledThreadListElement& b) { + return a.mRegisterTime < b.mRegisterTime; + }); + return array; + } + + static Vector> ProfiledPages(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + Vector> array; + for (auto& d : CorePS::RegisteredPages(aLock)) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + for (auto& d : sInstance->mDeadProfiledPages) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + // We don't need to sort the pages like threads since we won't show them + // as a list. + return array; + } + + static ProfiledThreadData* AddLiveProfiledThread( + PSLockRef, UniquePtr&& aProfiledThreadData) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT(sInstance->mLiveProfiledThreads.append( + LiveProfiledThreadData{std::move(aProfiledThreadData)})); + + // Return a weak pointer to the ProfiledThreadData object. + return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get(); + } + + static void UnregisterThread(PSLockRef aLockRef, + ProfiledThreadData* aProfiledThreadData) { + MOZ_ASSERT(sInstance); + + DiscardExpiredDeadProfiledThreads(aLockRef); + + // Find the right entry in the mLiveProfiledThreads array and remove the + // element, moving the ProfiledThreadData object for the thread into the + // mDeadProfiledThreads array. + for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) { + LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; + if (thread.mProfiledThreadData == aProfiledThreadData) { + thread.mProfiledThreadData->NotifyUnregistered( + sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append( + std::move(thread.mProfiledThreadData))); + sInstance->mLiveProfiledThreads.erase( + &sInstance->mLiveProfiledThreads[i]); + return; + } + } + } + + // This is a counter to collect process CPU utilization during profiling. + // It cannot be a raw `ProfilerCounter` because we need to manually add/remove + // it while the profiler lock is already held. + class ProcessCPUCounter final : public BaseProfilerCount { + public: + explicit ProcessCPUCounter(PSLockRef aLock) + : BaseProfilerCount("processCPU", &mCounter, nullptr, "CPU", + "Process CPU utilization") { + // Adding on construction, so it's ready before the sampler starts. + locked_profiler_add_sampled_counter(aLock, this); + // Note: Removed from ActivePS::Destroy, because a lock is needed. + } + + void Add(int64_t aNumber) { mCounter += aNumber; } + + private: + ProfilerAtomicSigned mCounter; + }; + PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter); + + PS_GET(PowerCounters*, MaybePowerCounters); + + PS_GET_AND_SET(bool, IsPaused) + + // True if sampling is paused (though generic `SetIsPaused()` or specific + // `SetIsSamplingPaused()`). + static bool IsSamplingPaused(PSLockRef lock) { + MOZ_ASSERT(sInstance); + return IsPaused(lock) || sInstance->mIsSamplingPaused; + } + + static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) { + MOZ_ASSERT(sInstance); + sInstance->mIsSamplingPaused = aIsSamplingPaused; + } + + static void DiscardExpiredDeadProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead threads that were unregistered before bufferRangeStart. + sInstance->mDeadProfiledThreads.eraseIf( + [bufferRangeStart]( + const UniquePtr& aProfiledThreadData) { + Maybe bufferPosition = + aProfiledThreadData->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this thread"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void UnregisterPage(PSLockRef aLock, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + auto& registeredPages = CorePS::RegisteredPages(aLock); + for (size_t i = 0; i < registeredPages.length(); i++) { + RefPtr& page = registeredPages[i]; + if (page->InnerWindowID() == aRegisteredInnerWindowID) { + page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT( + sInstance->mDeadProfiledPages.append(std::move(page))); + registeredPages.erase(®isteredPages[i--]); + } + } + } + + static void DiscardExpiredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead pages that were unregistered before + // bufferRangeStart. + sInstance->mDeadProfiledPages.eraseIf( + [bufferRangeStart](const RefPtr& aProfiledPage) { + Maybe bufferPosition = + aProfiledPage->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this page"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void ClearUnregisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mDeadProfiledPages.clear(); + } + + static void ClearExpiredExitProfiles(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard exit profiles that were gathered before our buffer RangeStart. + // If we have started to overwrite our data from when the Base profile was + // added, we should get rid of that Base profile because it's now older than + // our oldest Gecko profile data. + // + // When adding: (In practice the starting buffer should be empty) + // v Start == End + // | <-- Buffer range, initially empty. + // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it + // + // Later, still in range: + // v Start v End + // |=========| <-- Buffer range growing. + // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it + // + // Even later, now out of range: + // v Start v End + // |============| <-- Buffer range full and sliding. + // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it + if (sInstance->mBaseProfileThreads && + sInstance->mGeckoIndexWhenBaseProfileAdded + .ConvertToProfileBufferIndex() < + profiler_get_core_buffer().GetState().mRangeStart) { + DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p", + sInstance->mBaseProfileThreads.get()); + sInstance->mBaseProfileThreads.reset(); + } + sInstance->mExitProfiles.eraseIf( + [bufferRangeStart](const ExitProfile& aExitProfile) { + return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart; + }); + } + + static void AddBaseProfileThreads(PSLockRef aLock, + UniquePtr aBaseProfileThreads) { + MOZ_ASSERT(sInstance); + DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get()); + sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads); + sInstance->mGeckoIndexWhenBaseProfileAdded = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + profiler_get_core_buffer().GetState().mRangeEnd); + } + + static UniquePtr MoveBaseProfileThreads(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p", + sInstance->mBaseProfileThreads.get()); + return std::move(sInstance->mBaseProfileThreads); + } + + static void AddExitProfile(PSLockRef aLock, const nsACString& aExitProfile) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(ExitProfile{ + nsCString(aExitProfile), sInstance->mProfileBuffer.BufferRangeEnd()})); + } + + static Vector MoveExitProfiles(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + Vector profiles; + MOZ_RELEASE_ASSERT( + profiles.initCapacity(sInstance->mExitProfiles.length())); + for (auto& profile : sInstance->mExitProfiles) { + MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON))); + } + sInstance->mExitProfiles.clear(); + return profiles; + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) { + MOZ_ASSERT(sInstance); + + sInstance->mMemoryCounter = aMemoryCounter; + } + + static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) { + MOZ_ASSERT(sInstance); + + return sInstance->mMemoryCounter == aMemoryCounter; + } +#endif + + private: + // The singleton instance. + static ActivePS* sInstance; + + const TimeStamp mProfilingStartTime; + + // We need to track activity generations. If we didn't we could have the + // following scenario. + // + // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks + // gPSMutex, deletes the SamplerThread (which does a join). + // + // - profiler_start() runs on a different thread, locks gPSMutex, + // re-instantiates ActivePS, unlocks gPSMutex -- all before the join + // completes. + // + // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, + // and continues as if the start/stop pair didn't occur. Also + // profiler_stop() is stuck, unable to finish. + // + // By checking ActivePS *and* the generation, we can avoid this scenario. + // sNextGeneration is used to track the next generation number; it is static + // because it must persist across different ActivePS instantiations. + const uint32_t mGeneration; + static uint32_t sNextGeneration; + + // The maximum number of entries in mProfileBuffer. + const PowerOfTwo32 mCapacity; + + // The maximum duration of entries in mProfileBuffer, in seconds. + const Maybe mDuration; + + // The interval between samples, measured in milliseconds. + const double mInterval; + + // The profile features that are enabled. + const uint32_t mFeatures; + + // Substrings of names of threads we want to profile. + Vector mFilters; + Vector mFiltersLowered; + + // ID of the active browser screen's active tab. + // It's being used to determine the profiled tab. It's "0" if we failed to + // get the ID. + const uint64_t mActiveTabID; + + // The chunk manager used by `mProfileBuffer` below. + // May become null if it gets transferred ouf of the Gecko Profiler. + UniquePtr mProfileBufferChunkManager; + + // The buffer into which all samples are recorded. + ProfileBuffer mProfileBuffer; + + // ProfiledThreadData objects for any threads that were profiled at any point + // during this run of the profiler: + // - mLiveProfiledThreads contains all threads that are still registered, and + // - mDeadProfiledThreads contains all threads that have already been + // unregistered but for which there is still data in the profile buffer. + Vector mLiveProfiledThreads; + Vector> mDeadProfiledThreads; + + // Info on all the dead pages. + // Registered pages are being moved to this array after unregistration. + // We are keeping them in case we need them in the profile data. + // We are removing them when we ensure that we won't need them anymore. + Vector> mDeadProfiledPages; + + // Used to collect process CPU utilization values, if the feature is on. + ProcessCPUCounter* mMaybeProcessCPUCounter; + + // Used to collect power use data, if the power feature is on. + PowerCounters* mMaybePowerCounters; + + // The current sampler thread. This class is not responsible for destroying + // the SamplerThread object; the Destroy() method returns it so the caller + // can destroy it. + SamplerThread* const mSamplerThread; + + // Is the profiler fully paused? + bool mIsPaused; + + // Is the profiler periodic sampling paused? + bool mIsSamplingPaused; + + // Optional startup profile thread array from BaseProfiler. + UniquePtr mBaseProfileThreads; + ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded; + + struct ExitProfile { + nsCString mJSON; + uint64_t mBufferPositionAtGatherTime; + }; + Vector mExitProfiles; + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + Atomic mMemoryCounter; +#endif +}; + +ActivePS* ActivePS::sInstance = nullptr; +uint32_t ActivePS::sNextGeneration = 0; + +#undef PS_GET +#undef PS_GET_LOCKLESS +#undef PS_GET_AND_SET + +using ProfilerStateChangeMutex = + mozilla::baseprofiler::detail::BaseProfilerMutex; +using ProfilerStateChangeLock = + mozilla::baseprofiler::detail::BaseProfilerAutoLock; +static ProfilerStateChangeMutex gProfilerStateChangeMutex; + +struct IdentifiedProfilingStateChangeCallback { + ProfilingStateSet mProfilingStateSet; + ProfilingStateChangeCallback mProfilingStateChangeCallback; + uintptr_t mUniqueIdentifier; + + explicit IdentifiedProfilingStateChangeCallback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aProfilingStateChangeCallback, + uintptr_t aUniqueIdentifier) + : mProfilingStateSet(aProfilingStateSet), + mProfilingStateChangeCallback(aProfilingStateChangeCallback), + mUniqueIdentifier(aUniqueIdentifier) {} +}; +using IdentifiedProfilingStateChangeCallbackUPtr = + UniquePtr; + +static Vector + mIdentifiedProfilingStateChangeCallbacks; + +void profiler_add_state_change_callback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aCallback, + uintptr_t aUniqueIdentifier /* = 0 */) { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + +#ifdef DEBUG + // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG + // builds; in the worst case they may get removed too early. + if (aUniqueIdentifier != 0) { + for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback : + mIdentifiedProfilingStateChangeCallbacks) { + MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier); + } + } +#endif // DEBUG + + if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) && + profiler_is_active()) { + aCallback(ProfilingState::AlreadyActive); + } + + (void)mIdentifiedProfilingStateChangeCallbacks.append( + MakeUnique( + aProfilingStateSet, std::move(aCallback), aUniqueIdentifier)); +} + +// Remove the callback with the given identifier. +void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) { + MOZ_ASSERT(aUniqueIdentifier != 0); + if (aUniqueIdentifier == 0) { + // Forgive zero in non-DEBUG builds. + return; + } + + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + + mIdentifiedProfilingStateChangeCallbacks.eraseIf( + [aUniqueIdentifier]( + const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) { + if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) { + return false; + } + if (aIdedCallback->mProfilingStateSet.contains( + ProfilingState::RemovingCallback)) { + aIdedCallback->mProfilingStateChangeCallback( + ProfilingState::RemovingCallback); + } + return true; + }); +} + +static void invoke_profiler_state_change_callbacks( + ProfilingState aProfilingState) { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + + for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback : + mIdentifiedProfilingStateChangeCallbacks) { + if (idedCallback->mProfilingStateSet.contains(aProfilingState)) { + idedCallback->mProfilingStateChangeCallback(aProfilingState); + } + } +} + +Atomic RacyFeatures::sActiveAndFeatures(0); + +// The name of the main thread. +static const char* const kMainThreadName = "GeckoMain"; + +//////////////////////////////////////////////////////////////////////// +// BEGIN sampling/unwinding code + +// The registers used for stack unwinding and a few other sampling purposes. +// The ctor does nothing; users are responsible for filling in the fields. +class Registers { + public: + Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + + void Clear() { memset(this, 0, sizeof(*this)); } + + // These fields are filled in by + // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace + // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples. + Address mPC; // Instruction pointer. + Address mSP; // Stack pointer. + Address mFP; // Frame pointer. + Address mLR; // ARM link register. +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // This contains all the registers, which means it duplicates the four fields + // above. This is ok. + ucontext_t* mContext; // The context from the signal handler or below. + ucontext_t mContextSyncStorage; // Storage for sync stack unwinding. +#endif +}; + +// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time +// looping on corrupted stacks. +static const size_t MAX_NATIVE_FRAMES = 1024; + +struct NativeStack { + void* mPCs[MAX_NATIVE_FRAMES]; + void* mSPs[MAX_NATIVE_FRAMES]; + size_t mCount; // Number of frames filled. + + NativeStack() : mPCs(), mSPs(), mCount(0) {} +}; + +Atomic WALKING_JS_STACK(false); + +struct AutoWalkJSStack { + bool walkAllowed; + + AutoWalkJSStack() : walkAllowed(false) { + walkAllowed = WALKING_JS_STACK.compareExchange(false, true); + } + + ~AutoWalkJSStack() { + if (walkAllowed) { + WALKING_JS_STACK = false; + } + } +}; + +class StackWalkControl { + public: + struct ResumePoint { + // If lost, the stack walker should resume at these values. + void* resumeSp; // If null, stop the walker here, don't resume again. + void* resumeBp; + void* resumePc; + }; + +#if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \ + defined(GP_ARCH_amd64)) + public: + static constexpr bool scIsSupported = true; + + void Clear() { mResumePointCount = 0; } + + size_t ResumePointCount() const { return mResumePointCount; } + + static constexpr size_t MaxResumePointCount() { + return scMaxResumePointCount; + } + + // Add a resume point. Note that adding anything past MaxResumePointCount() + // would silently fail. In practice this means that stack walking may still + // lose native frames. + void AddResumePoint(ResumePoint&& aResumePoint) { + // If SP is null, we expect BP and PC to also be null. + MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp); + MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc); + + // If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to + // be null even if SP is not null.) + MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp); + MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp); + + if (mResumePointCount < scMaxResumePointCount) { + mResumePoint[mResumePointCount] = std::move(aResumePoint); + ++mResumePointCount; + } + } + + // Only allow non-modifying range-for loops. + const ResumePoint* begin() const { return &mResumePoint[0]; } + const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; } + + // Find the next resume point that would be a caller of the function with the + // given SP; i.e., the resume point with the closest resumeSp > aSp. + const ResumePoint* GetResumePointCallingSp(void* aSp) const { + const ResumePoint* callingResumePoint = nullptr; + for (const ResumePoint& resumePoint : *this) { + if (resumePoint.resumeSp && // This is a potential resume point. + resumePoint.resumeSp > aSp && // It is a caller of the given SP. + (!callingResumePoint || // This is the first candidate. + resumePoint.resumeSp < callingResumePoint->resumeSp) // Or better. + ) { + callingResumePoint = &resumePoint; + } + } + return callingResumePoint; + } + + private: + size_t mResumePointCount = 0; + static constexpr size_t scMaxResumePointCount = 32; + ResumePoint mResumePoint[scMaxResumePointCount]; + +#else + public: + static constexpr bool scIsSupported = false; + // Discarded constexpr-if statements are still checked during compilation, + // these declarations are necessary for that, even if not actually used. + void Clear(); + size_t ResumePointCount(); + static constexpr size_t MaxResumePointCount(); + void AddResumePoint(ResumePoint&& aResumePoint); + const ResumePoint* begin() const; + const ResumePoint* end() const; + const ResumePoint* GetResumePointCallingSp(void* aSp) const; +#endif +}; + +// Make a copy of the JS stack into a JSFrame array, and return the number of +// copied frames. +// This copy is necessary since, like the native stack, the JS stack is iterated +// youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks. +static uint32_t ExtractJsFrames( + bool aIsSynchronous, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, ProfilerStackCollector& aCollector, + JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) { + MOZ_ASSERT(aJsFrames, + "ExtractJsFrames should only be called if there is a " + "JsFrameBuffer to fill."); + + uint32_t jsFramesCount = 0; + + // Only walk jit stack if profiling frame iterator is turned on. + JSContext* context = aThreadData.GetJSContext(); + if (context && JS::IsProfilingEnabledForContext(context)) { + AutoWalkJSStack autoWalkJSStack; + + if (autoWalkJSStack.walkAllowed) { + JS::ProfilingFrameIterator::RegisterState registerState; + registerState.pc = aRegs.mPC; + registerState.sp = aRegs.mSP; + registerState.lr = aRegs.mLR; + registerState.fp = aRegs.mFP; + + // Non-periodic sampling passes Nothing() as the buffer write position to + // ProfilingFrameIterator to avoid incorrectly resetting the buffer + // position of sampled JIT frames inside the JS engine. + Maybe samplePosInBuffer; + if (!aIsSynchronous) { + // aCollector.SamplePositionInBuffer() will return Nothing() when + // profiler_suspend_and_sample_thread is called from the background hang + // reporter. + samplePosInBuffer = aCollector.SamplePositionInBuffer(); + } + + for (JS::ProfilingFrameIterator jsIter(context, registerState, + samplePosInBuffer); + !jsIter.done(); ++jsIter) { + if (aIsSynchronous || jsIter.isWasm()) { + jsFramesCount += + jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES); + if (jsFramesCount == MAX_JS_FRAMES) { + break; + } + } else { + Maybe frame = + jsIter.getPhysicalFrameWithoutLabel(); + if (frame.isSome()) { + aJsFrames[jsFramesCount++] = std::move(frame).ref(); + if (jsFramesCount == MAX_JS_FRAMES) { + break; + } + } + } + + if constexpr (StackWalkControl::scIsSupported) { + if (aStackWalkControlIfSupported) { + jsIter.getCppEntryRegisters().apply( + [&](const JS::ProfilingFrameIterator::RegisterState& + aCppEntry) { + StackWalkControl::ResumePoint resumePoint; + resumePoint.resumeSp = aCppEntry.sp; + resumePoint.resumeBp = aCppEntry.fp; + resumePoint.resumePc = aCppEntry.pc; + aStackWalkControlIfSupported->AddResumePoint( + std::move(resumePoint)); + }); + } + } else { + MOZ_ASSERT(!aStackWalkControlIfSupported, + "aStackWalkControlIfSupported should be null when " + "!StackWalkControl::scIsSupported"); + (void)aStackWalkControlIfSupported; + } + } + } + } + + return jsFramesCount; +} + +// Merges the profiling stack, native stack, and JS stack, outputting the +// details to aCollector. +static void MergeStacks( + uint32_t aFeatures, bool aIsSynchronous, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, const NativeStack& aNativeStack, + ProfilerStackCollector& aCollector, JsFrame* aJsFrames, + uint32_t aJsFramesCount) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0); + + const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef(); + const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames; + uint32_t profilingStackFrameCount = profilingStack.stackSize(); + + // While the profiling stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to aInfo + // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS + // and native arrays backwards. Note: this means the terminating condition + // jsIndex and nativeIndex is being < 0. + uint32_t profilingStackIndex = 0; + int32_t jsIndex = aJsFramesCount - 1; + int32_t nativeIndex = aNativeStack.mCount - 1; + + uint8_t* lastLabelFrameStackAddr = nullptr; + uint8_t* jitEndStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 || + nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + uint8_t* profilingStackAddr = nullptr; + uint8_t* jsStackAddr = nullptr; + uint8_t* nativeStackAddr = nullptr; + uint8_t* jsActivationAddr = nullptr; + + if (profilingStackIndex != profilingStackFrameCount) { + const js::ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + if (profilingStackFrame.isLabelFrame() || + profilingStackFrame.isSpMarkerFrame()) { + lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress(); + } + + // Skip any JS_OSR frames. Such frames are used when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). + // To avoid both the profiling stack frame and jit frame being recorded + // (and showing up twice), the interpreter marks the interpreter + // profiling stack frame as JS_OSR to ensure that it doesn't get counted. + if (profilingStackFrame.isOSRFrame()) { + profilingStackIndex++; + continue; + } + + MOZ_ASSERT(lastLabelFrameStackAddr); + profilingStackAddr = lastLabelFrameStackAddr; + } + + if (jsIndex >= 0) { + jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress; + jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation; + } + + if (nativeIndex >= 0) { + nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex]; + } + + // If there's a native stack frame which has the same SP as a profiling + // stack frame, pretend we didn't see the native stack frame. Ditto for a + // native stack frame which has the same SP as a JS stack frame. In effect + // this means profiling stack frames or JS frames trump conflicting native + // frames. + if (nativeStackAddr && (profilingStackAddr == nativeStackAddr || + jsStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(profilingStackAddr || jsStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(profilingStackAddr, + profilingStackAddr != jsStackAddr && + profilingStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr && + jsStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr && + nativeStackAddr != jsStackAddr); + + // Check to see if profiling stack frame is top-most. + if (profilingStackAddr > jsStackAddr && + profilingStackAddr > nativeStackAddr) { + MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); + const js::ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + // Sp marker frames are just annotations and should not be recorded in + // the profile. + if (!profilingStackFrame.isSpMarkerFrame()) { + // The JIT only allows the top-most frame to have a nullptr pc. + MOZ_ASSERT_IF( + profilingStackFrame.isJsFrame() && profilingStackFrame.script() && + !profilingStackFrame.pc(), + &profilingStackFrame == + &profilingStack.frames[profilingStack.stackSize() - 1]); + if (aIsSynchronous && profilingStackFrame.categoryPair() == + JS::ProfilingCategoryPair::PROFILER) { + // For stacks captured synchronously (ie. marker stacks), stop + // walking the stack as soon as we enter the profiler category, + // to avoid showing profiler internal code in marker stacks. + return; + } + aCollector.CollectProfilingStackFrame(profilingStackFrame); + } + profilingStackIndex++; + continue; + } + + // Check to see if JS jit stack frame is top-most + if (jsStackAddr > nativeStackAddr) { + MOZ_ASSERT(jsIndex >= 0); + const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex]; + jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress; + // Stringifying non-wasm JIT frames is delayed until streaming time. To + // re-lookup the entry in the JitcodeGlobalTable, we need to store the + // JIT code address (OptInfoAddr) in the circular buffer. + // + // Note that we cannot do this when we are sychronously sampling the + // current thread; that is, when called from profiler_get_backtrace. The + // captured backtrace is usually externally stored for an indeterminate + // amount of time, such as in nsRefreshDriver. Problematically, the + // stored backtrace may be alive across a GC during which the profiler + // itself is disabled. In that case, the JS engine is free to discard its + // JIT code. This means that if we inserted such OptInfoAddr entries into + // the buffer, nsRefreshDriver would now be holding on to a backtrace + // with stale JIT code return addresses. + if (aIsSynchronous || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) { + aCollector.CollectWasmFrame(jsFrame.label); + } else if (jsFrame.kind == + JS::ProfilingFrameIterator::Frame_BaselineInterpreter) { + // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We + // also set the IS_BLINTERP_FRAME flag to differentiate though. + JSScript* script = jsFrame.interpreterScript; + jsbytecode* pc = jsFrame.interpreterPC(); + js::ProfilingStackFrame stackFrame; + constexpr uint32_t ExtraFlags = + uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME); + stackFrame.initJsFrame("", jsFrame.label, script, pc, + jsFrame.realmID); + aCollector.CollectProfilingStackFrame(stackFrame); + } else { + MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline); + aCollector.CollectJitReturnAddr(jsFrame.returnAddress()); + } + + jsIndex--; + continue; + } + + // If we reach here, there must be a native stack frame and it must be the + // greatest frame. + if (nativeStackAddr && + // If the latest JS frame was JIT, this could be the native frame that + // corresponds to it. In that case, skip the native frame, because + // there's no need for the same frame to be present twice in the stack. + // The JS frame can be considered the symbolicated version of the native + // frame. + (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) && + // This might still be a JIT operation, check to make sure that is not + // in range of the NEXT JavaScript's stacks' activation address. + (!jsActivationAddr || nativeStackAddr > jsActivationAddr)) { + MOZ_ASSERT(nativeIndex >= 0); + void* addr = (void*)aNativeStack.mPCs[nativeIndex]; + aCollector.CollectNativeLeafAddr(addr); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } + + // Update the JS context with the current profile sample buffer generation. + // + // Only do this for periodic samples. We don't want to do this for + // synchronous samples, and we also don't want to do it for calls to + // profiler_suspend_and_sample_thread() from the background hang reporter - + // in that case, aCollector.BufferRangeStart() will return Nothing(). + if (!aIsSynchronous) { + aCollector.BufferRangeStart().apply( + [&aThreadData](uint64_t aBufferRangeStart) { + JSContext* context = aThreadData.GetJSContext(); + if (context) { + JS::SetJSContextProfilerSampleBufferRangeStart(context, + aBufferRangeStart); + } + }); + } +} + +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) { + NativeStack* nativeStack = static_cast(aClosure); + MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES); + nativeStack->mSPs[nativeStack->mCount] = aSP; + nativeStack->mPCs[nativeStack->mCount] = aPC; + nativeStack->mCount++; +} +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) +static void DoFramePointerBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Make a local copy of the Registers, to allow modifications. + Registers regs = aRegs; + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() call below will use 1..N. This is a bit weird + // but it doesn't matter because StackWalkCallback() doesn't use the frame + // number argument. + StackWalkCallback(/* frameNum */ 0, regs.mPC, regs.mSP, &aNativeStack); + + const void* const stackEnd = aThreadData.StackTop(); + + // This is to check forward-progress after using a resume point. + void* previousResumeSp = nullptr; + + for (;;) { + if (!(regs.mSP && regs.mSP <= regs.mFP && regs.mFP <= stackEnd)) { + break; + } + FramePointerStackWalk(StackWalkCallback, + uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount), + &aNativeStack, reinterpret_cast(regs.mFP), + const_cast(stackEnd)); + + if constexpr (!StackWalkControl::scIsSupported) { + break; + } else { + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + // No room to add more frames. + break; + } + if (!aStackWalkControlIfSupported || + aStackWalkControlIfSupported->ResumePointCount() == 0) { + // No resume information. + break; + } + void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; + if (previousResumeSp && + ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { + // No progress after the previous resume point. + break; + } + const StackWalkControl::ResumePoint* resumePoint = + aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); + if (!resumePoint) { + break; + } + void* sp = resumePoint->resumeSp; + if (!sp) { + // Null SP in a resume point means we stop here. + break; + } + void* pc = resumePoint->resumePc; + StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp, + &aNativeStack); + ++aNativeStack.mCount; + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + break; + } + // Prepare context to resume stack walking. + regs.mPC = (Address)pc; + regs.mSP = (Address)sp; + regs.mFP = (Address)resumePoint->resumeBp; + + previousResumeSp = sp; + } + } +} +#endif + +#if defined(USE_MOZ_STACK_WALK) +static void DoMozStackWalkBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the MozStackWalkThread() call below will use 1..N. This is a bit weird but + // it doesn't matter because StackWalkCallback() doesn't use the frame number + // argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + HANDLE thread = aThreadData.PlatformDataCRef().ProfiledThread(); + MOZ_ASSERT(thread); + + CONTEXT context_buf; + CONTEXT* context = nullptr; + if constexpr (StackWalkControl::scIsSupported) { + context = &context_buf; + memset(&context_buf, 0, sizeof(CONTEXT)); + context_buf.ContextFlags = CONTEXT_FULL; +# if defined(_M_AMD64) + context_buf.Rsp = (DWORD64)aRegs.mSP; + context_buf.Rbp = (DWORD64)aRegs.mFP; + context_buf.Rip = (DWORD64)aRegs.mPC; +# else + static_assert(!StackWalkControl::scIsSupported, + "Mismatched support between StackWalkControl and " + "DoMozStackWalkBacktrace"); +# endif + } else { + context = nullptr; + } + + // This is to check forward-progress after using a resume point. + void* previousResumeSp = nullptr; + + for (;;) { + MozStackWalkThread(StackWalkCallback, + uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount), + &aNativeStack, thread, context); + + if constexpr (!StackWalkControl::scIsSupported) { + break; + } else { + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + // No room to add more frames. + break; + } + if (!aStackWalkControlIfSupported || + aStackWalkControlIfSupported->ResumePointCount() == 0) { + // No resume information. + break; + } + void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; + if (previousResumeSp && + ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { + // No progress after the previous resume point. + break; + } + const StackWalkControl::ResumePoint* resumePoint = + aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); + if (!resumePoint) { + break; + } + void* sp = resumePoint->resumeSp; + if (!sp) { + // Null SP in a resume point means we stop here. + break; + } + void* pc = resumePoint->resumePc; + StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp, + &aNativeStack); + ++aNativeStack.mCount; + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + break; + } + // Prepare context to resume stack walking. + memset(&context_buf, 0, sizeof(CONTEXT)); + context_buf.ContextFlags = CONTEXT_FULL; +# if defined(_M_AMD64) + context_buf.Rsp = (DWORD64)sp; + context_buf.Rbp = (DWORD64)resumePoint->resumeBp; + context_buf.Rip = (DWORD64)pc; +# else + static_assert(!StackWalkControl::scIsSupported, + "Mismatched support between StackWalkControl and " + "DoMozStackWalkBacktrace"); +# endif + previousResumeSp = sp; + } + } +} +#endif + +#ifdef USE_EHABI_STACKWALK +static void DoEHABIBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + aNativeStack.mCount = EHABIStackWalk( + aRegs.mContext->uc_mcontext, const_cast(aThreadData.StackTop()), + aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES); + (void)aStackWalkControlIfSupported; // TODO: Implement. +} +#endif + +#ifdef USE_LUL_STACKWALK + +// See the comment at the callsite for why this function is necessary. +# if defined(MOZ_HAVE_ASAN_IGNORE) +MOZ_ASAN_IGNORE static void ASAN_memcpy(void* aDst, const void* aSrc, + size_t aLen) { + // The obvious thing to do here is call memcpy(). However, although + // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the + // false positive still manifests! So we must implement memcpy() ourselves + // within this function. + char* dst = static_cast(aDst); + const char* src = static_cast(aSrc); + + for (size_t i = 0; i < aLen; i++) { + dst[i] = src[i]; + } +} +# endif + +static void DoLULBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + (void)aStackWalkControlIfSupported; // TODO: Implement. + + const mcontext_t* mc = &aRegs.mContext->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(GP_PLAT_amd64_freebsd) + startRegs.xip = lul::TaggedUWord(mc->mc_rip); + startRegs.xsp = lul::TaggedUWord(mc->mc_rsp); + startRegs.xbp = lul::TaggedUWord(mc->mc_rbp); +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.x29 = lul::TaggedUWord(mc->regs[29]); + startRegs.x30 = lul::TaggedUWord(mc->regs[30]); + startRegs.sp = lul::TaggedUWord(mc->sp); +# elif defined(GP_PLAT_arm64_freebsd) + startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr); + startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]); + startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr); + startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp); +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# elif defined(GP_PLAT_mips64_linux) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.sp = lul::TaggedUWord(mc->gregs[29]); + startRegs.fp = lul::TaggedUWord(mc->gregs[30]); +# else +# error "Unknown plat" +# endif + + // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the + // stack's registered top point. Do some basic validity checks too. This + // assumes that the TaggedUWord holding the stack pointer value is valid, but + // it should be, since it was constructed that way in the code just above. + + // We could construct |stackImg| so that LUL reads directly from the stack in + // question, rather than from a copy of it. That would reduce overhead and + // space use a bit. However, it gives a problem with dynamic analysis tools + // (ASan, TSan, Valgrind) which is that such tools will report invalid or + // racing memory accesses, and such accesses will be reported deep inside LUL. + // By taking a copy here, we can either sanitise the copy (for Valgrind) or + // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have + // to try and suppress errors inside LUL. + // + // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks + // observed in some minutes of testing, whilst keeping the size of this + // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in + // practice are small, 4KB or less, and so the copy costs are insignificant + // compared to other profiler overhead. + // + // |stackImg| is allocated on this (the sampling thread's) stack. That + // implies that the frame for this function is at least N_STACK_BYTES large. + // In general it would be considered unacceptable to have such a large frame + // on a stack, but it only exists for the unwinder thread, and so is not + // expected to be a problem. Allocating it on the heap is troublesome because + // this function runs whilst the sampled thread is suspended, so any heap + // allocation risks deadlock. Allocating it as a global variable is not + // thread safe, which would be a problem if we ever allow multiple sampler + // threads. Hence allocating it on the stack seems to be the least-worst + // option. + + lul::StackImage stackImg; + + { +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_mips64_linux) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast(aThreadData.StackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy >= 1024u * 1024u) { + // start is abnormally far from end, possibly due to some special code + // that uses a separate stack elsewhere (e.g.: rr). In this case we just + // give up on this sample. + nToCopy = 0; + } else if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + // If this is a vanilla memcpy(), ASAN makes the following complaint: + // + // ERROR: AddressSanitizer: stack-buffer-underflow ... + // ... + // HINT: this may be a false positive if your program uses some custom + // stack unwind mechanism or swapcontext + // + // This code is very much a custom stack unwind mechanism! So we use an + // alternative memcpy() implementation that is ignored by ASAN. +# if defined(MOZ_HAVE_ASAN_IGNORE) + ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# else + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# endif + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + size_t framePointerFramesAcquired = 0; + lul::LUL* lul = CorePS::Lul(); + MOZ_RELEASE_ASSERT(lul); + lul->Unwind(reinterpret_cast(aNativeStack.mPCs), + reinterpret_cast(aNativeStack.mSPs), + &aNativeStack.mCount, &framePointerFramesAcquired, + MAX_NATIVE_FRAMES, &startRegs, &stackImg); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + lul->mStats.mContext += 1; + lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired; + lul->mStats.mFP += framePointerFramesAcquired; +} + +#endif + +#ifdef HAVE_NATIVE_UNWIND +static void DoNativeBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // This method determines which stackwalker is used for periodic and + // synchronous samples. (Backtrace samples are treated differently, see + // profiler_suspend_and_sample_thread() for details). The only part of the + // ordering that matters is that LUL must precede FRAME_POINTER, because on + // Linux they can both be present. +# if defined(USE_LUL_STACKWALK) + DoLULBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_EHABI_STACKWALK) + DoEHABIBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# else +# error "Invalid configuration" +# endif +} +#endif + +// Writes some components shared by periodic and synchronous profiles to +// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() +// and DoPeriodicSample().) +// +// The grammar for entry sequences is in a comment above +// ProfileBuffer::StreamSamplesToJSON. +static inline void DoSharedSample( + bool aIsSynchronous, uint32_t aFeatures, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos, + uint64_t aBufferRangeStart, ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(!aBuffer.IsThreadSafe(), + "Mutexes cannot be used inside this critical section"); + + ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart); + StackWalkControl* stackWalkControlIfSupported = nullptr; +#if defined(HAVE_NATIVE_UNWIND) + const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) && + aCaptureOptions == StackCaptureOptions::Full; + StackWalkControl stackWalkControl; + if constexpr (StackWalkControl::scIsSupported) { + if (captureNative) { + stackWalkControlIfSupported = &stackWalkControl; + } + } +#endif // defined(HAVE_NATIVE_UNWIND) + const uint32_t jsFramesCount = + aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector, + aJsFrames, stackWalkControlIfSupported) + : 0; + NativeStack nativeStack; +#if defined(HAVE_NATIVE_UNWIND) + if (captureNative) { + DoNativeBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); + + MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack, + collector, aJsFrames, jsFramesCount); + } else +#endif + { + MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack, + collector, aJsFrames, jsFramesCount); + + // We can't walk the whole native stack, but we can record the top frame. + if (aCaptureOptions == StackCaptureOptions::Full) { + aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC)); + } + } +} + +// Writes the components of a synchronous sample to the given ProfileBuffer. +static void DoSyncSample( + uint32_t aFeatures, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Registers& aRegs, ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack, + "DoSyncSample should not be called when no capture is needed"); + + const uint64_t bufferRangeStart = aBuffer.BufferRangeStart(); + + const uint64_t samplePos = + aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId()); + + TimeDuration delta = aNow - CorePS::ProcessStartTime(); + aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + if (!aThreadData.GetJSContext()) { + // No JSContext, there is no JS frame buffer (and no need for it). + DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData, + /* aJsFrames = */ nullptr, aRegs, samplePos, + bufferRangeStart, aBuffer, aCaptureOptions); + } else { + // JSContext is present, we need to lock the thread data to access the JS + // frame buffer. + ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef + aOnThreadRef) { + aOnThreadRef.WithConstLockedRWOnThread( + [&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) { + DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData, + aLockedThreadData.GetJsFrameBuffer(), aRegs, + samplePos, bufferRangeStart, aBuffer, + aCaptureOptions); + }); + }); + } +} + +// Writes the components of a periodic sample to ActivePS's ProfileBuffer. +// The ThreadId entry is already written in the main ProfileBuffer, its location +// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). +static inline void DoPeriodicSample( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock)); + + JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock); + DoSharedSample(/* aIsSynchronous = */ false, ActivePS::Features(aLock), + aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart, + aBuffer); +} + +// END sampling/unwinding code +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN saving/streaming code + +const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL; + +static int64_t SafeJSInteger(uint64_t aValue) { + return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1; +} + +static void AddSharedLibraryInfoToStream(JSONWriter& aWriter, + const SharedLibrary& aLib) { + aWriter.StartObjectElement(); + aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart())); + aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd())); + aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset())); + aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName())); + aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath())); + aWriter.StringProperty("debugName", + NS_ConvertUTF16toUTF8(aLib.GetDebugName())); + aWriter.StringProperty("debugPath", + NS_ConvertUTF16toUTF8(aLib.GetDebugPath())); + aWriter.StringProperty("breakpadId", aLib.GetBreakpadId()); + aWriter.StringProperty("codeId", aLib.GetCodeId()); + aWriter.StringProperty("arch", aLib.GetArch()); + aWriter.EndObject(); +} + +void AppendSharedLibraries(JSONWriter& aWriter, + const SharedLibraryInfo& aInfo) { + for (size_t i = 0; i < aInfo.GetSize(); i++) { + AddSharedLibraryInfoToStream(aWriter, aInfo.GetEntry(i)); + } +} + +static void StreamCategories(SpliceableJSONWriter& aWriter) { + // Same order as ProfilingCategory. Format: + // [ + // { + // name: "Idle", + // color: "transparent", + // subcategories: ["Other"], + // }, + // { + // name: "Other", + // color: "grey", + // subcategories: [ + // "JSM loading", + // "Subprocess launching", + // "DLL loading" + // ] + // }, + // ... + // ] + +#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \ + aWriter.Start(); \ + aWriter.StringProperty("name", labelAsString); \ + aWriter.StringProperty("color", color); \ + aWriter.StartArrayProperty("subcategories"); +#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \ + aWriter.StringElement(labelAsString); +#define CATEGORY_JSON_END_CATEGORY \ + aWriter.EndArray(); \ + aWriter.EndObject(); + + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY, + CATEGORY_JSON_SUBCATEGORY, + CATEGORY_JSON_END_CATEGORY) + +#undef CATEGORY_JSON_BEGIN_CATEGORY +#undef CATEGORY_JSON_SUBCATEGORY +#undef CATEGORY_JSON_END_CATEGORY +} + +static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) { + // Get an array view with all registered marker-type-specific functions. + base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList + markerTypeFunctionsArray; + // List of streamed marker names, this is used to spot duplicates. + std::set names; + // Stream the display schema for each different one. (Duplications may come + // from the same code potentially living in different libraries.) + for (const auto& markerTypeFunctions : markerTypeFunctionsArray) { + auto name = markerTypeFunctions.mMarkerTypeNameFunction(); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.) + const bool didInsert = + names.insert(std::string(name.data(), name.size())).second; + if (didInsert) { + markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name); + } + } + + // Now stream the Rust marker schemas. Passing the names set as a void pointer + // as well, so we can continue checking if the schemes are added already in + // the Rust side. + profiler::ffi::gecko_profiler_stream_marker_schemas( + &aWriter, static_cast(&names)); +} + +// Some meta information that is better recorded before streaming the profile. +// This is *not* intended to be cached, as some values could change between +// profiling sessions. +struct PreRecordedMetaInformation { + bool mAsyncStacks; + + // This struct should only live on the stack, so it's fine to use Auto + // strings. + nsAutoCString mHttpPlatform; + nsAutoCString mHttpOscpu; + nsAutoCString mHttpMisc; + + nsAutoCString mRuntimeABI; + nsAutoCString mRuntimeToolkit; + + nsAutoCString mAppInfoProduct; + nsAutoCString mAppInfoAppBuildID; + nsAutoCString mAppInfoSourceURL; + + int32_t mProcessInfoCpuCount; + int32_t mProcessInfoCpuCores; + nsAutoCString mProcessInfoCpuName; +}; + +// This function should be called out of the profiler lock. +// It gathers non-trivial data that doesn't require the profiler to stop, or for +// which the request could theoretically deadlock if the profiler is locked. +static PreRecordedMetaInformation PreRecordMetaInformation() { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + + PreRecordedMetaInformation info = {}; // Aggregate-init all fields. + + if (!NS_IsMainThread()) { + // Leave these properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return info; + } + + info.mAsyncStacks = Preferences::GetBool("javascript.options.asyncstack"); + + nsresult res; + + if (nsCOMPtr http = + do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res); + !NS_FAILED(res) && http) { + Unused << http->GetPlatform(info.mHttpPlatform); + +#if defined(GP_OS_darwin) + // On Mac, the http "oscpu" is capped at 10.15, so we need to get the real + // OS version directly. + int major = 0; + int minor = 0; + int bugfix = 0; + nsCocoaFeatures::GetSystemVersion(major, minor, bugfix); + if (major != 0) { + info.mHttpOscpu.AppendLiteral("macOS "); + info.mHttpOscpu.AppendInt(major); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(minor); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(bugfix); + } else +#endif +#if defined(GP_OS_windows) + // On Windows, the http "oscpu" is capped at Windows 10, so we need to get + // the real OS version directly. + OSVERSIONINFO ovi = {sizeof(OSVERSIONINFO)}; + if (GetVersionEx(&ovi)) { + info.mHttpOscpu.AppendLiteral("Windows "); + // The major version returned for Windows 11 is 10, but we can + // identify it from the build number. + info.mHttpOscpu.AppendInt( + ovi.dwBuildNumber >= 22000 ? 11 : int32_t(ovi.dwMajorVersion)); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(int32_t(ovi.dwMinorVersion)); +# if defined(_ARM64_) + info.mHttpOscpu.AppendLiteral(" Arm64"); +# endif + info.mHttpOscpu.AppendLiteral("; build="); + info.mHttpOscpu.AppendInt(int32_t(ovi.dwBuildNumber)); + } else +#endif + { + Unused << http->GetOscpu(info.mHttpOscpu); + } + + // Firefox version is capped to 109.0 in the http "misc" field due to some + // webcompat issues (Bug 1805967). We need to put the real version instead. + info.mHttpMisc.AssignLiteral("rv:"); + info.mHttpMisc.AppendLiteral(MOZILLA_UAVERSION); + } + + if (nsCOMPtr runtime = + do_GetService("@mozilla.org/xre/runtime;1"); + runtime) { + Unused << runtime->GetXPCOMABI(info.mRuntimeABI); + Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit); + } + + if (nsCOMPtr appInfo = + do_GetService("@mozilla.org/xre/app-info;1"); + appInfo) { + Unused << appInfo->GetName(info.mAppInfoProduct); + Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID); + Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL); + } + + ProcessInfo processInfo = {}; // Aggregate-init all fields to false/zeroes. + if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) { + info.mProcessInfoCpuCount = processInfo.cpuCount; + info.mProcessInfoCpuCores = processInfo.cpuCores; + info.mProcessInfoCpuName = processInfo.cpuName; + } + + return info; +} + +// Implemented in platform-specific cpps, to add object properties describing +// the units of CPU measurements in samples. +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter); + +static void StreamMetaJSCustomObject( + PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown, + const PreRecordedMetaInformation& aPreRecordedMetaInformation) { + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + aWriter.IntProperty("version", 27); + + // The "startTime" field holds the number of milliseconds since midnight + // January 1, 1970 GMT. This grotty code computes (Now - (Now - + // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form. + // Note: This is the only absolute time in the profile! All other timestamps + // are relative to this startTime. + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + aWriter.DoubleProperty( + "startTime", + static_cast(PR_Now() / 1000.0 - delta.ToMilliseconds())); + + aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() - + CorePS::ProcessStartTime()) + .ToMilliseconds()); + + if (const TimeStamp contentEarliestTime = + ActivePS::Buffer(aLock) + .UnderlyingChunkedBuffer() + .GetEarliestChunkStartTimeStamp(); + !contentEarliestTime.IsNull()) { + aWriter.DoubleProperty( + "contentEarliestTime", + (contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds()); + } else { + aWriter.NullProperty("contentEarliestTime"); + } + + const double profilingEndTime = profiler_time(); + aWriter.DoubleProperty("profilingEndTime", profilingEndTime); + + if (aIsShuttingDown) { + aWriter.DoubleProperty("shutdownTime", profilingEndTime); + } else { + aWriter.NullProperty("shutdownTime"); + } + + aWriter.StartArrayProperty("categories"); + StreamCategories(aWriter); + aWriter.EndArray(); + + aWriter.StartArrayProperty("markerSchema"); + StreamMarkerSchema(aWriter); + aWriter.EndArray(); + + ActivePS::WriteActiveConfiguration(aLock, aWriter, + MakeStringSpan("configuration")); + + if (!NS_IsMainThread()) { + // Leave the rest of the properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return; + } + + aWriter.DoubleProperty("interval", ActivePS::Interval(aLock)); + aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock)); + +#ifdef DEBUG + aWriter.IntProperty("debug", 1); +#else + aWriter.IntProperty("debug", 0); +#endif + + aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0); + + aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks); + + aWriter.IntProperty("processType", XRE_GetProcessType()); + + aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL)); + + if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) { + aWriter.StringProperty("platform", + aPreRecordedMetaInformation.mHttpPlatform); + } + if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) { + aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu); + } + if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) { + aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc); + } + + if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) { + aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI); + } + if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) { + aWriter.StringProperty("toolkit", + aPreRecordedMetaInformation.mRuntimeToolkit); + } + + if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) { + aWriter.StringProperty("product", + aPreRecordedMetaInformation.mAppInfoProduct); + } + if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) { + aWriter.StringProperty("appBuildID", + aPreRecordedMetaInformation.mAppInfoAppBuildID); + } + if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) { + aWriter.StringProperty("sourceURL", + aPreRecordedMetaInformation.mAppInfoSourceURL); + } + + if (!aPreRecordedMetaInformation.mProcessInfoCpuName.IsEmpty()) { + aWriter.StringProperty("CPUName", + aPreRecordedMetaInformation.mProcessInfoCpuName); + } + if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) { + aWriter.IntProperty("physicalCPUs", + aPreRecordedMetaInformation.mProcessInfoCpuCores); + } + if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) { + aWriter.IntProperty("logicalCPUs", + aPreRecordedMetaInformation.mProcessInfoCpuCount); + } + +#if defined(GP_OS_android) + jni::String::LocalRef deviceInformation = + java::GeckoJavaSampler::GetDeviceInformation(); + aWriter.StringProperty("device", deviceInformation->ToCString()); +#endif + + aWriter.StartObjectProperty("sampleUnits"); + { + aWriter.StringProperty("time", "ms"); + aWriter.StringProperty("eventDelay", "ms"); + StreamMetaPlatformSampleUnits(aLock, aWriter); + } + aWriter.EndObject(); + + // We should avoid collecting extension metadata for profiler when there is no + // observer service, since a ExtensionPolicyService could not be created then. + if (nsCOMPtr os = services::GetObserverService()) { + aWriter.StartObjectProperty("extensions"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("id"); + schema.WriteField("name"); + schema.WriteField("baseURL"); + } + + aWriter.StartArrayProperty("data"); + { + nsTArray> exts; + ExtensionPolicyService::GetSingleton().GetAll(exts); + + for (auto& ext : exts) { + aWriter.StartArrayElement(); + + nsAutoString id; + ext->GetId(id); + aWriter.StringElement(NS_ConvertUTF16toUTF8(id)); + + aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name())); + + auto url = ext->GetURL(u""_ns); + if (url.isOk()) { + aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap())); + } + + aWriter.EndArray(); + } + } + aWriter.EndArray(); + } + aWriter.EndObject(); + } +} + +static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + ActivePS::DiscardExpiredPages(aLock); + for (const auto& page : ActivePS::ProfiledPages(aLock)) { + page->StreamJSON(aWriter); + } +} + +#if defined(GP_OS_android) +template +static bool StartsWith(const nsACString& string, const char (&prefix)[N]) { + if (N - 1 > string.Length()) { + return false; + } + return memcmp(string.Data(), prefix, N - 1) == 0; +} + +static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) { + if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) { + return JS::ProfilingCategoryPair::IDLE; + } + if (aName.EqualsLiteral("java.lang.Object.wait()")) { + return JS::ProfilingCategoryPair::JAVA_BLOCKED; + } + if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) { + return JS::ProfilingCategoryPair::JAVA_ANDROID; + } + if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) { + return JS::ProfilingCategoryPair::JAVA_MOZILLA; + } + if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") || + StartsWith(aName, "com.sun.")) { + return JS::ProfilingCategoryPair::JAVA_LANGUAGE; + } + if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) { + return JS::ProfilingCategoryPair::JAVA_KOTLIN; + } + if (StartsWith(aName, "androidx.")) { + return JS::ProfilingCategoryPair::JAVA_ANDROIDX; + } + return JS::ProfilingCategoryPair::OTHER; +} + +// Marker type for Java markers without any details. +struct JavaMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("Java"); + } + static void StreamJSONMarkerData( + baseprofiler::SpliceableJSONWriter& aWriter) {} + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart, + MS::Location::MarkerTable}; + schema.SetAllLabels("{marker.name}"); + return schema; + } +}; + +// Marker type for Java markers with a detail field. +struct JavaMarkerWithDetails { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("JavaWithDetails"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + const ProfilerString8View& aText) { + // This (currently) needs to be called "name" to be searchable on the + // front-end. + aWriter.StringProperty("name", aText); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart, + MS::Location::MarkerTable}; + schema.SetTooltipLabel("{marker.name}"); + schema.SetChartLabel("{marker.data.name}"); + schema.SetTableLabel("{marker.name} - {marker.data.name}"); + schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String, + MS::Searchable::Searchable); + return schema; + } +}; + +static void CollectJavaThreadProfileData( + nsTArray& javaThreads, + ProfileBuffer& aProfileBuffer) { + // Retrieve metadata about the threads. + const auto threadCount = java::GeckoJavaSampler::GetRegisteredThreadCount(); + for (int i = 0; i < threadCount; i++) { + javaThreads.AppendElement( + java::GeckoJavaSampler::GetRegisteredThreadInfo(i)); + } + + // locked_profiler_start uses sample count is 1000 for Java thread. + // This entry size is enough now, but we might have to estimate it + // if we can customize it + // Pass the samples + int sampleId = 0; + while (true) { + const auto threadId = java::GeckoJavaSampler::GetThreadId(sampleId); + double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId); + if (threadId == 0 || sampleTime == 0.0) { + break; + } + + aProfileBuffer.AddThreadIdEntry(ProfilerThreadId::FromNumber(threadId)); + aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime)); + int frameId = 0; + while (true) { + jni::String::LocalRef frameName = + java::GeckoJavaSampler::GetFrameName(sampleId, frameId++); + if (!frameName) { + break; + } + nsCString frameNameString = frameName->ToCString(); + + auto categoryPair = InferJavaCategory(frameNameString); + aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0, + Nothing(), Nothing(), + Some(categoryPair)); + } + sampleId++; + } + + // Pass the markers now + while (true) { + // Gets the data from the Android UI thread only. + java::GeckoJavaSampler::Marker::LocalRef marker = + java::GeckoJavaSampler::PollNextMarker(); + if (!marker) { + // All markers are transferred. + break; + } + + // Get all the marker information from the Java thread using JNI. + const auto threadId = ProfilerThreadId::FromNumber(marker->GetThreadId()); + nsCString markerName = marker->GetMarkerName()->ToCString(); + jni::String::LocalRef text = marker->GetMarkerText(); + TimeStamp startTime = + CorePS::ProcessStartTime() + + TimeDuration::FromMilliseconds(marker->GetStartTime()); + + double endTimeMs = marker->GetEndTime(); + // A marker can be either a duration with start and end, or a point in time + // with only startTime. If endTime is 0, this means it's a point in time. + TimeStamp endTime = endTimeMs == 0 + ? startTime + : CorePS::ProcessStartTime() + + TimeDuration::FromMilliseconds(endTimeMs); + MarkerTiming timing = endTimeMs == 0 + ? MarkerTiming::InstantAt(startTime) + : MarkerTiming::Interval(startTime, endTime); + + if (!text) { + // This marker doesn't have a text. + AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName, + geckoprofiler::category::JAVA_ANDROID, + {MarkerThreadId(threadId), std::move(timing)}, + JavaMarker{}); + } else { + // This marker has a text. + AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName, + geckoprofiler::category::JAVA_ANDROID, + {MarkerThreadId(threadId), std::move(timing)}, + JavaMarkerWithDetails{}, text->ToCString()); + } + } +} +#endif + +UniquePtr +profiler_code_address_service_for_presymbolication() { + static const bool preSymbolicate = []() { + const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE"); + return symbolicate && symbolicate[0] != '\0'; + }(); + return preSymbolicate ? MakeUnique() : nullptr; +} + +static ProfilerResult +locked_profiler_stream_json_for_this_process( + PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("locked_profiler_stream_json_for_this_process"); + +#ifdef DEBUG + PRIntervalTime slowWithSleeps = 0; + if (!XRE_IsParentProcess()) { + for (const auto& filter : ActivePS::Filters(aLock)) { + if (filter == "test-debug-child-slow-json") { + LOG("test-debug-child-slow-json"); + // There are 10 slow-downs below, each will sleep 250ms, for a total of + // 2.5s, which should trigger the first progress request after 1s, and + // the next progress which will have advanced further, so this profile + // shouldn't get dropped. + slowWithSleeps = PR_MillisecondsToInterval(250); + } else if (filter == "test-debug-child-very-slow-json") { + LOG("test-debug-child-very-slow-json"); + // Wait for more than 2s without any progress, which should get this + // profile discarded. + PR_Sleep(PR_SecondsToInterval(5)); + } + } + } +# define SLOW_DOWN_FOR_TESTING() \ + if (slowWithSleeps != 0) { \ + DEBUG_LOG("progress=%.0f%%, sleep...", \ + aProgressLogger.GetGlobalProgress().ToDouble() * 100.0); \ + PR_Sleep(slowWithSleeps); \ + } +#else // #ifdef DEBUG +# define SLOW_DOWN_FOR_TESTING() /* No slow-downs */ +#endif // #ifdef DEBUG #else + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process); + + const double collectionStartMs = profiler_time(); + + ProfileBuffer& buffer = ActivePS::Buffer(aLock); + + aProgressLogger.SetLocalProgress(1_pc, "Locked profile buffer"); + + SLOW_DOWN_FOR_TESTING(); + + // If there is a set "Window length", discard older data. + Maybe durationS = ActivePS::Duration(aLock); + if (durationS.isSome()) { + const double durationStartMs = collectionStartMs - *durationS * 1000; + buffer.DiscardSamplesBeforeTime(durationStartMs); + } + aProgressLogger.SetLocalProgress(2_pc, "Discarded old data"); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + +#if defined(GP_OS_android) + // Java thread profile data should be collected before serializing the meta + // object. This is because Java thread adds some markers with marker schema + // objects. And these objects should be added before the serialization of the + // `profile.meta.markerSchema` array, so these marker schema objects can also + // be serialized properly. That's why java thread profile data needs to be + // done before everything. + + // We are allocating it chunk by chunk. So this will not allocate 64 MiB + // at once. This size should be more than enough for java threads. + // This buffer is being created for each process but Android has + // relatively fewer processes compared to desktop, so it's okay here. + mozilla::ProfileBufferChunkManagerWithLocalLimit javaChunkManager( + 64 * 1024 * 1024, 1024 * 1024); + ProfileChunkedBuffer javaBufferManager( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, javaChunkManager); + ProfileBuffer javaBuffer(javaBufferManager); + + nsTArray javaThreads; + + if (ActivePS::FeatureJava(aLock)) { + CollectJavaThreadProfileData(javaThreads, javaBuffer); + aProgressLogger.SetLocalProgress(3_pc, "Collected Java thread"); + } +#endif + + // Put shared library info + aWriter.StartArrayProperty("libs"); + SharedLibraryInfo sharedLibraryInfo = SharedLibraryInfo::GetInfoForSelf(); + sharedLibraryInfo.SortByAddress(); + AppendSharedLibraries(aWriter, sharedLibraryInfo); + aWriter.EndArray(); + aProgressLogger.SetLocalProgress(4_pc, "Wrote library information"); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + + // Put meta data + aWriter.StartObjectProperty("meta"); + { + StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown, + aPreRecordedMetaInformation); + } + aWriter.EndObject(); + aProgressLogger.SetLocalProgress(5_pc, "Wrote profile metadata"); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + + // Put page data + aWriter.StartArrayProperty("pages"); + { StreamPages(aLock, aWriter); } + aWriter.EndArray(); + aProgressLogger.SetLocalProgress(6_pc, "Wrote pages"); + + buffer.StreamProfilerOverheadToJSON( + aWriter, CorePS::ProcessStartTime(), aSinceTime, + aProgressLogger.CreateSubLoggerTo(10_pc, "Wrote profiler overheads")); + + buffer.StreamCountersToJSON( + aWriter, CorePS::ProcessStartTime(), aSinceTime, + aProgressLogger.CreateSubLoggerTo(14_pc, "Wrote counters")); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + + // Lists the samples for each thread profile + aWriter.StartArrayProperty("threads"); + { + ActivePS::DiscardExpiredDeadProfiledThreads(aLock); + aProgressLogger.SetLocalProgress(15_pc, "Discarded expired profiles"); + + ThreadRegistry::LockedRegistry lockedRegistry; + ActivePS::ProfiledThreadList threads = + ActivePS::ProfiledThreads(lockedRegistry, aLock); + + const uint32_t threadCount = uint32_t(threads.length()); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + + // Prepare the streaming context for each thread. + ProcessStreamingContext processStreamingContext( + threadCount, aWriter.SourceFailureLatch(), CorePS::ProcessStartTime(), + aSinceTime); + for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo( + 20_pc, threadCount, "Preparing thread streaming contexts...")) { + ActivePS::ProfiledThreadListElement& thread = threads[i]; + MOZ_RELEASE_ASSERT(thread.mProfiledThreadData); + processStreamingContext.AddThreadStreamingContext( + *thread.mProfiledThreadData, buffer, thread.mJSContext, aService, + std::move(progressLogger)); + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + } + + SLOW_DOWN_FOR_TESTING(); + + // Read the buffer once, and extract all samples and markers that the + // context expects. + buffer.StreamSamplesAndMarkersToJSON( + processStreamingContext, aProgressLogger.CreateSubLoggerTo( + "Processing samples and markers...", 80_pc, + "Processed samples and markers")); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + SLOW_DOWN_FOR_TESTING(); + + // Stream each thread from the pre-filled context. + ThreadStreamingContext* const contextListBegin = + processStreamingContext.begin(); + MOZ_ASSERT(uint32_t(processStreamingContext.end() - contextListBegin) == + threadCount); + for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo( + 92_pc, threadCount, "Streaming threads...")) { + ThreadStreamingContext& threadStreamingContext = contextListBegin[i]; + threadStreamingContext.FinalizeWriter(); + threadStreamingContext.mProfiledThreadData.StreamJSON( + std::move(threadStreamingContext), aWriter, + CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock), + CorePS::ProcessStartTime(), aService, std::move(progressLogger)); + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + } + aProgressLogger.SetLocalProgress(92_pc, "Wrote samples and markers"); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + for (java::GeckoJavaSampler::ThreadInfo::LocalRef& threadInfo : + javaThreads) { + ProfiledThreadData threadData(ThreadRegistrationInfo{ + threadInfo->GetName()->ToCString().BeginReading(), + ProfilerThreadId::FromNumber(threadInfo->GetId()), false, + CorePS::ProcessStartTime()}); + + threadData.StreamJSON( + javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock), + CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime, + nullptr, + aProgressLogger.CreateSubLoggerTo("Streaming Java thread...", 96_pc, + "Streamed Java thread")); + } + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + } else { + aProgressLogger.SetLocalProgress(96_pc, "No Java thread"); + } +#endif + + UniquePtr baseProfileThreads = + ActivePS::MoveBaseProfileThreads(aLock); + if (baseProfileThreads) { + aWriter.Splice(MakeStringSpan(baseProfileThreads.get())); + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + aProgressLogger.SetLocalProgress(97_pc, "Wrote baseprofiler data"); + } else { + aProgressLogger.SetLocalProgress(97_pc, "No baseprofiler data"); + } + } + aWriter.EndArray(); + + SLOW_DOWN_FOR_TESTING(); + + aWriter.StartArrayProperty("pausedRanges"); + { + buffer.StreamPausedRangesToJSON( + aWriter, aSinceTime, + aProgressLogger.CreateSubLoggerTo("Streaming pauses...", 99_pc, + "Streamed pauses")); + } + aWriter.EndArray(); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + + ProfilingLog::Access([&](Json::Value& aProfilingLogObject) { + aProfilingLogObject[Json::StaticString{ + "profilingLogEnd" TIMESTAMP_JSON_SUFFIX}] = ProfilingLog::Timestamp(); + + aWriter.StartObjectProperty("profilingLog"); + { + nsAutoCString pid; + pid.AppendInt(int64_t(profiler_current_process_id().ToNumber())); + Json::String logString = ToCompactString(aProfilingLogObject); + aWriter.SplicedJSONProperty(pid, logString); + } + aWriter.EndObject(); + }); + + const double collectionEndMs = profiler_time(); + + // Record timestamps for the collection into the buffer, so that consumers + // know why we didn't collect any samples for its duration. + // We put these entries into the buffer after we've collected the profile, + // so they'll be visible for the *next* profile collection (if they haven't + // been overwritten due to buffer wraparound by then). + buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs)); + buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs)); + +#ifdef DEBUG + if (slowWithSleeps != 0) { + LOG("locked_profiler_stream_json_for_this_process done"); + } +#endif // DEBUG + + return ProfileGenerationAdditionalInformation{std::move(sharedLibraryInfo)}; +} + +// Keep this internal function non-static, so it may be used by tests. +ProfilerResult +do_profiler_stream_json_for_this_process( + SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + aProgressLogger.SetLocalProgress(2_pc, "PreRecordMetaInformation done"); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile); + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return Err(ProfilerError::IsInactive); + } + + ProfileGenerationAdditionalInformation additionalInfo; + MOZ_TRY_VAR( + additionalInfo, + locked_profiler_stream_json_for_this_process( + lock, aWriter, aSinceTime, preRecordedMetaInformation, + aIsShuttingDown, aService, + aProgressLogger.CreateSubLoggerFromTo( + 3_pc, "locked_profiler_stream_json_for_this_process started", + 100_pc, "locked_profiler_stream_json_for_this_process done"))); + + if (aWriter.Failed()) { + return Err(ProfilerError::JsonGenerationFailed); + } + return additionalInfo; +} + +ProfilerResult +profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter, + double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + MOZ_RELEASE_ASSERT( + !XRE_IsParentProcess() || NS_IsMainThread(), + "In the parent process, profiles should only be generated from the main " + "thread, otherwise they will be incomplete."); + + ProfileGenerationAdditionalInformation additionalInfo; + MOZ_TRY_VAR(additionalInfo, do_profiler_stream_json_for_this_process( + aWriter, aSinceTime, aIsShuttingDown, + aService, std::move(aProgressLogger))); + + return additionalInfo; +} + +// END saving/streaming code +//////////////////////////////////////////////////////////////////////// + +static char FeatureCategory(uint32_t aFeature) { + if (aFeature & DefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'D'; + } + return 'd'; + } + + if (aFeature & StartupExtraDefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'S'; + } + return 's'; + } + + if (aFeature & AvailableFeatures()) { + return '-'; + } + return 'x'; +} + +static void PrintUsage() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + printf( + "\n" + "Profiler environment variable usage:\n" + "\n" + " MOZ_PROFILER_HELP\n" + " If set to any value, prints this message.\n" + " Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n" + "\n" + " MOZ_LOG\n" + " Enables logging. The levels of logging available are\n" + " 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n" + "\n" + " MOZ_PROFILER_STARTUP\n" + " If set to any value other than '' or '0'/'N'/'n', starts the\n" + " profiler immediately on start-up.\n" + " Useful if you want profile code that runs very early.\n" + "\n" + " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n" + " process in the profiler's circular buffer when the profiler is first\n" + " started.\n" + " If unset, the platform default is used:\n" + " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n" + " (%u bytes per entry -> %u or %u total bytes per process)\n" + " Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n" + "\n" + " MOZ_PROFILER_STARTUP_DURATION=<1..>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n" + " entries in the the profiler's circular buffer when the profiler is\n" + " first started, in seconds.\n" + " If unset, the life time of the entries will only be restricted by\n" + " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n" + " additional time duration restriction will be applied.\n" + "\n" + " MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n" + " measured in milliseconds, when the profiler is first started.\n" + " If unset, the platform default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" + " the integer value of the features bitfield.\n" + " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES=\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" + " a comma-separated list of strings.\n" + " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n" + " If unset, the platform default is used.\n" + "\n" + " Features: (x=unavailable, D/d=default/unavailable,\n" + " S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n", + unsigned(ActivePS::scMinimumBufferEntries), + unsigned(ActivePS::scMaximumBufferEntries), + unsigned(PROFILER_DEFAULT_ENTRIES.Value()), + unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()), + unsigned(scBytesPerEntry), + unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry), + unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry), + PROFILER_MAX_INTERVAL); + +#define PRINT_FEATURE(n_, str_, Name_, desc_) \ + printf(" %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \ + ProfilerFeature::Name_, str_, desc_); + + PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE) + +#undef PRINT_FEATURE + + printf( + " - \"default\" (All above D+S defaults)\n" + "\n" + " MOZ_PROFILER_STARTUP_FILTERS=\n" + " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n" + " comma-separated list of strings. A given thread will be sampled if\n" + " any of the filters is a case-insensitive substring of the thread\n" + " name. If unset, a default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=\n" + " This variable is used to propagate the activeTabID of\n" + " the profiler init params to subprocesses.\n" + "\n" + " MOZ_PROFILER_SHUTDOWN=\n" + " If set, the profiler saves a profile to the named file on shutdown.\n" + " If the Filename contains \"%%p\", this will be replaced with the'\n" + " process id of the parent process.\n" + "\n" + " MOZ_PROFILER_SYMBOLICATE\n" + " If set, the profiler will pre-symbolicate profiles.\n" + " *Note* This will add a significant pause when gathering data, and\n" + " is intended mainly for local development.\n" + "\n" + " MOZ_PROFILER_LUL_TEST\n" + " If set to any value, runs LUL unit tests at startup.\n" + "\n" + " This platform %s native unwinding.\n" + "\n", +#if defined(HAVE_NATIVE_UNWIND) + "supports" +#else + "does not support" +#endif + ); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler + +#if defined(GP_OS_linux) || defined(GP_OS_android) +struct SigHandlerCoordinator; +#endif + +// Sampler performs setup and teardown of the state required to sample with the +// profiler. Sampler may exist when ActivePS is not present. +// +// SuspendAndSampleAndResumeThread must only be called from a single thread, +// and must not sample the thread it is being called from. A separate Sampler +// instance must be used for each thread which wants to capture samples. + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// With the exception of SamplerThread, all Sampler objects must be Disable-d +// before releasing the lock which was used to create them. This avoids races +// on linux with the SIGPROF signal handler. + +class Sampler { + public: + // Sets up the profiler such that it can begin sampling. + explicit Sampler(PSLockRef aLock); + + // Disable the sampler, restoring it to its previous state. This must be + // called once, and only once, before the Sampler is destroyed. + void Disable(PSLockRef aLock); + + // This method suspends and resumes the samplee thread. It calls the passed-in + // function-like object aProcessRegs (passing it a populated |const + // Registers&| arg) while the samplee thread is suspended. Note that + // the aProcessRegs function must be very careful not to do anything that + // requires a lock, since we may have interrupted the thread at any point. + // As an example, you can't call TimeStamp::Now() since on windows it + // takes a lock on the performance counter. + // + // Func must be a function-like object of type `void()`. + template + void SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs); + + private: +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // Used to restore the SIGPROF handler when ours is removed. + struct sigaction mOldSigprofHandler; + + // This process' ID. Needed as an argument for tgkill in + // SuspendAndSampleAndResumeThread. + ProfilerProcessId mMyPid; + + // The sampler thread's ID. Used to assert that it is not sampling itself, + // which would lead to deadlock. + ProfilerThreadId mSamplerTid; + + public: + // This is the one-and-only variable used to communicate between the sampler + // thread and the samplee thread's signal handler. It's static because the + // samplee thread's signal handler is static. + static struct SigHandlerCoordinator* sSigHandlerCoordinator; +#endif +}; + +// END Sampler +//////////////////////////////////////////////////////////////////////// + +// Platform-specific function that retrieves per-thread CPU measurements. +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); +// Platform-specific function that *may* discard CPU measurements since the +// previous call to GetThreadRunningTimesDiff, if the way to suspend threads on +// this platform may add running times to that thread. +// No-op otherwise, if suspending a thread doesn't make it work. +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); + +// Platform-specific function that retrieves process CPU measurements. +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated); + +// Template function to be used by `GetThreadRunningTimesDiff()` (unless some +// platform has a better way to achieve this). +// It help perform CPU measurements and tie them to a timestamp, such that the +// measurements and timestamp are very close together. +// This is necessary, because the relative CPU usage is computed by dividing +// consecutive CPU measurements by their timestamp difference; if there was an +// unexpected big gap, it could skew this computation and produce impossible +// spikes that would hide the rest of the data. See bug 1685938 for more info. +// Note that this may call the measurement function more than once; it is +// assumed to normally be fast. +// This was verified experimentally, but there is currently no regression +// testing for it; see follow-up bug 1687402. +template +RunningTimes GetRunningTimesWithTightTimestamp( + GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) { + // Once per process, compute a threshold over which running times and their + // timestamp is considered too far apart. + static const TimeDuration scMaxRunningTimesReadDuration = [&]() { + // Run the main CPU measurements + timestamp a number of times and capture + // their durations. + constexpr int loops = 128; + TimeDuration durations[loops]; + RunningTimes runningTimes; + TimeStamp before = TimeStamp::Now(); + for (int i = 0; i < loops; ++i) { + AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration); + aGetCPURunningTimesFunction(runningTimes); + const TimeStamp after = TimeStamp::Now(); + durations[i] = after - before; + before = after; + } + // Move median duration to the middle. + std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]); + // Use median*8 as cut-off point. + // Typical durations should be around a microsecond, the cut-off should then + // be around 10 microseconds, well below the expected minimum inter-sample + // interval (observed as a few milliseconds), so overall this should keep + // cpu/interval spikes + return durations[loops / 2] * 8; + }(); + + // Record CPU measurements between two timestamps. + RunningTimes runningTimes; + TimeStamp before = TimeStamp::Now(); + aGetCPURunningTimesFunction(runningTimes); + TimeStamp after = TimeStamp::Now(); + const TimeDuration duration = after - before; + + // In most cases, the above should be quick enough. But if not (e.g., because + // of an OS context switch), repeat once: + if (MOZ_UNLIKELY(duration > scMaxRunningTimesReadDuration)) { + AUTO_PROFILER_STATS(GetRunningTimes_REDO); + RunningTimes runningTimes2; + aGetCPURunningTimesFunction(runningTimes2); + TimeStamp after2 = TimeStamp::Now(); + const TimeDuration duration2 = after2 - after; + if (duration2 < duration) { + // We did it faster, use the new results. (But it could still be slower + // than expected, see note below for why it's acceptable.) + // This must stay *after* the CPU measurements. + runningTimes2.SetPostMeasurementTimeStamp(after2); + return runningTimes2; + } + // Otherwise use the initial results, they were slow, but faster than the + // second attempt. + // This means that something bad happened twice in a row on the same thread! + // So trying more times would be unlikely to get much better, and would be + // more expensive than the precision is worth. + // At worst, it means that a spike of activity may be reported in the next + // time slice. But in the end, the cumulative work is conserved, so it + // should still be visible at about the correct time in the graph. + AUTO_PROFILER_STATS(GetRunningTimes_RedoWasWorse); + } + + // This must stay *after* the CPU measurements. + runningTimes.SetPostMeasurementTimeStamp(after); + + return runningTimes; +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread + +// The sampler thread controls sampling and runs whenever the profiler is +// active. It periodically runs through all registered threads, finds those +// that should be sampled, then pauses and samples them. + +class SamplerThread { + public: + // Creates a sampler thread, but doesn't start it. + SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures); + ~SamplerThread(); + + // This runs on (is!) the sampler thread. + void Run(); + +#if defined(GP_OS_windows) + // This runs on (is!) the thread to spy on unregistered threads. + void RunUnregisteredThreadSpy(); +#endif + + // This runs on the main thread. + void Stop(PSLockRef aLock); + + void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) { + // We are under lock, so it's safe to just modify the list pointer. + // Also this means the sampler has not started its run yet, so any callback + // added now will be invoked at the end of the next loop; this guarantees + // that the callback will be invoked after at least one full sampling loop. + mPostSamplingCallbackList = MakeUnique( + std::move(mPostSamplingCallbackList), std::move(aCallback)); + } + + private: + void SpyOnUnregisteredThreads(); + + // Item containing a post-sampling callback, and a tail-list of more items. + // Using a linked list means no need to move items when adding more, and + // "stealing" the whole list is one pointer move. + struct PostSamplingCallbackListItem { + UniquePtr mPrev; + PostSamplingCallback mCallback; + + PostSamplingCallbackListItem(UniquePtr aPrev, + PostSamplingCallback&& aCallback) + : mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {} + }; + + [[nodiscard]] UniquePtr + TakePostSamplingCallbacks(PSLockRef) { + return std::move(mPostSamplingCallbackList); + } + + static void InvokePostSamplingCallbacks( + UniquePtr aCallbacks, + SamplingState aSamplingState) { + if (!aCallbacks) { + return; + } + // We want to drill down to the last element in this list, which is the + // oldest one, so that we invoke them in FIFO order. + // We don't expect many callbacks, so it's safe to recurse. Note that we're + // moving-from the UniquePtr, so the tail will implicitly get destroyed. + InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState); + // We are going to destroy this item, so we can safely move-from the + // callback before calling it (in case it has an rvalue-ref-qualified call + // operator). + std::move(aCallbacks->mCallback)(aSamplingState); + // It may be tempting for a future maintainer to change aCallbacks into an + // rvalue reference; this will remind them not to do that! + static_assert( + std::is_same_v>, + "We need to capture the list by-value, to implicitly destroy it"); + } + + // This suspends the calling thread for the given number of microseconds. + // Best effort timing. + void SleepMicro(uint32_t aMicroseconds); + + // The sampler used to suspend and sample threads. + Sampler mSampler; + + // The activity generation, for detecting when the sampler thread must stop. + const uint32_t mActivityGeneration; + + // The interval between samples, measured in microseconds. + const int mIntervalMicroseconds; + + // The OS-specific handle for the sampler thread. +#if defined(GP_OS_windows) + HANDLE mThread; + HANDLE mUnregisteredThreadSpyThread = nullptr; + enum class SpyingState { + NoSpying, + Spy_Initializing, + // Spy is waiting for SamplerToSpy_Start or MainToSpy_Shutdown. + Spy_Waiting, + // Sampler requests spy to start working. May be pre-empted by + // MainToSpy_Shutdown. + SamplerToSpy_Start, + // Spy is currently working, cannot be interrupted, only the spy is allowed + // to change the state again. + Spy_Working, + // Main control requests spy to shut down. + MainToSpy_Shutdown, + // Spy notified main control that it's out of the loop, about to exit. + SpyToMain_ShuttingDown + }; + SpyingState mSpyingState = SpyingState::NoSpying; + // The sampler will increment this while the spy is working, then while the + // spy is waiting the sampler will decrement it until <=0 before starting the + // spy. This will ensure that the work doesn't take more than 50% of a CPU + // core. + int mDelaySpyStart = 0; + Monitor mSpyingStateMonitor MOZ_UNANNOTATED{ + "SamplerThread::mSpyingStateMonitor"}; +#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ + defined(GP_OS_android) || defined(GP_OS_freebsd) + pthread_t mThread; +#endif + + // Post-sampling callbacks are kept in a simple linked list, which will be + // stolen by the sampler thread at the end of its next run. + UniquePtr mPostSamplingCallbackList; + +#if defined(GP_OS_windows) + bool mNoTimerResolutionChange = true; +#endif + + struct SpiedThread { + base::ProcessId mThreadId; + nsCString mName; + uint64_t mCPUTimeNs; + + SpiedThread(base::ProcessId aThreadId, const nsACString& aName, + uint64_t aCPUTimeNs) + : mThreadId(aThreadId), mName(aName), mCPUTimeNs(aCPUTimeNs) {} + + // Comparisons with just a thread id, for easy searching in an array. + friend bool operator==(const SpiedThread& aSpiedThread, + base::ProcessId aThreadId) { + return aSpiedThread.mThreadId == aThreadId; + } + friend bool operator==(base::ProcessId aThreadId, + const SpiedThread& aSpiedThread) { + return aThreadId == aSpiedThread.mThreadId; + } + }; + + // Time at which mSpiedThreads was previously updated. Null before 1st update. + TimeStamp mLastSpying; + // Unregistered threads that have been found, and are being spied on. + using SpiedThreads = AutoTArray; + SpiedThreads mSpiedThreads; + + SamplerThread(const SamplerThread&) = delete; + void operator=(const SamplerThread&) = delete; +}; + +// [[nodiscard]] static +bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock, + PostSamplingCallback&& aCallback) { + if (!sInstance || !sInstance->mSamplerThread) { + return false; + } + sInstance->mSamplerThread->AppendPostSamplingCallback(aLock, + std::move(aCallback)); + return true; +} + +// This function is required because we need to create a SamplerThread within +// ActivePS's constructor, but SamplerThread is defined after ActivePS. It +// could probably be removed by moving some code around. +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures) { + return new SamplerThread(aLock, aGeneration, aInterval, aFeatures); +} + +// This function is the sampler thread. This implementation is used for all +// targets. +void SamplerThread::Run() { + PR_SetCurrentThreadName("SamplerThread"); + + // Features won't change during this SamplerThread's lifetime, so we can read + // them once and store them locally. + const uint32_t features = []() -> uint32_t { + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + // If there is no active profiler, it doesn't matter what we return, + // because this thread will exit before any feature is used. + return 0; + } + return ActivePS::Features(lock); + }(); + + // Not *no*-stack-sampling means we do want stack sampling. + const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features); + + const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features); + + // Use local ProfileBuffer and underlying buffer to capture the stack. + // (This is to avoid touching the core buffer lock while a thread is + // suspended, because that thread could be working with the core buffer as + // well. + mozilla::ProfileBufferChunkManagerSingle localChunkManager( + ProfileBufferChunkManager::scExpectedMaximumStackSize); + ProfileChunkedBuffer localBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager); + ProfileBuffer localProfileBuffer(localBuffer); + + // Will be kept between collections, to know what each collection does. + auto previousState = localBuffer.GetState(); + + // This will be filled at every loop, to be used by the next loop to compute + // the CPU utilization between samples. + RunningTimes processRunningTimes; + + // This will be set inside the loop, from inside the lock scope, to capture + // all callbacks added before that, but none after the lock is released. + UniquePtr postSamplingCallbacks; + // This will be set inside the loop, before invoking callbacks outside. + SamplingState samplingState{}; + + const TimeDuration sampleInterval = + TimeDuration::FromMicroseconds(mIntervalMicroseconds); + const uint32_t minimumIntervalSleepUs = + static_cast(mIntervalMicroseconds / 4); + + // This is the scheduled time at which each sampling loop should start. + // It will determine the ideal next sampling start by adding the expected + // interval, unless when sampling runs late -- See end of while() loop. + TimeStamp scheduledSampleStart = TimeStamp::Now(); + + while (true) { + const TimeStamp sampleStart = TimeStamp::Now(); + + // This scope is for |lock|. It ends before we sleep below. + { + // There should be no local callbacks left from a previous loop. + MOZ_ASSERT(!postSamplingCallbacks); + + PSAutoLock lock; + TimeStamp lockAcquired = TimeStamp::Now(); + + // Move all the post-sampling callbacks locally, so that new ones cannot + // sneak in between the end of the lock scope and the invocation after it. + postSamplingCallbacks = TakePostSamplingCallbacks(lock); + + if (!ActivePS::Exists(lock)) { + // Exit the `while` loop, including the lock scope, before invoking + // callbacks and returning. + samplingState = SamplingState::JustStopped; + break; + } + + // At this point profiler_stop() might have been called, and + // profiler_start() might have been called on another thread. If this + // happens the generation won't match. + if (ActivePS::Generation(lock) != mActivityGeneration) { + samplingState = SamplingState::JustStopped; + // Exit the `while` loop, including the lock scope, before invoking + // callbacks and returning. + break; + } + + ActivePS::ClearExpiredExitProfiles(lock); + + TimeStamp expiredMarkersCleaned = TimeStamp::Now(); + + if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) { + double sampleStartDeltaMs = + (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds(); + ProfileBuffer& buffer = ActivePS::Buffer(lock); + + // Before sampling counters, update the process CPU counter if active. + if (ActivePS::ProcessCPUCounter* processCPUCounter = + ActivePS::MaybeProcessCPUCounter(lock); + processCPUCounter) { + RunningTimes processRunningTimesDiff = + GetProcessRunningTimesDiff(lock, processRunningTimes); + Maybe cpu = processRunningTimesDiff.GetJsonThreadCPUDelta(); + if (cpu) { + processCPUCounter->Add(static_cast(*cpu)); + } + } + + if (PowerCounters* powerCounters = ActivePS::MaybePowerCounters(lock); + powerCounters) { + powerCounters->Sample(); + } + + // handle per-process generic counters + const Vector& counters = CorePS::Counters(lock); + for (auto& counter : counters) { + if (auto sample = counter->Sample(); sample.isSampleNew) { + // create Buffer entries for each counter + buffer.AddEntry(ProfileBufferEntry::CounterId(counter)); + buffer.AddEntry(ProfileBufferEntry::Time(sampleStartDeltaMs)); +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::IsMemoryCounter(counter)) { + // For the memory counter, substract the size of our buffer to + // avoid giving the misleading impression that the memory use + // keeps on growing when it's just the profiler session that's + // using a larger buffer as it gets longer. + sample.count -= static_cast( + ActivePS::ControlledChunkManager(lock).TotalSize()); + } +#endif + // In the future, we may support keyed counters - for example, + // counters with a key which is a thread ID. For "simple" counters + // we'll just use a key of 0. + buffer.AddEntry(ProfileBufferEntry::CounterKey(0)); + buffer.AddEntry(ProfileBufferEntry::Count(sample.count)); + if (sample.number) { + buffer.AddEntry(ProfileBufferEntry::Number(sample.number)); + } + } + } + TimeStamp countersSampled = TimeStamp::Now(); + + if (stackSampling || cpuUtilization) { + samplingState = SamplingState::SamplingCompleted; + + // Prevent threads from ending (or starting) and allow access to all + // OffThreadRef's. + ThreadRegistry::LockedRegistry lockedRegistry; + + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + ThreadRegistration::UnlockedRWForLockedProfiler& + unlockedThreadData = + offThreadRef.UnlockedRWForLockedProfilerRef(); + ProfiledThreadData* profiledThreadData = + unlockedThreadData.GetProfiledThreadData(lock); + if (!profiledThreadData) { + // This thread is not being profiled, continue with the next one. + continue; + } + + const ThreadProfilingFeatures whatToProfile = + unlockedThreadData.ProfilingFeatures(); + const bool threadCPUUtilization = + cpuUtilization && + DoFeaturesIntersect(whatToProfile, + ThreadProfilingFeatures::CPUUtilization); + const bool threadStackSampling = + stackSampling && + DoFeaturesIntersect(whatToProfile, + ThreadProfilingFeatures::Sampling); + if (!threadCPUUtilization && !threadStackSampling) { + // Nothing to profile on this thread, continue with the next one. + continue; + } + + const ProfilerThreadId threadId = + unlockedThreadData.Info().ThreadId(); + + const RunningTimes runningTimesDiff = [&]() { + if (!threadCPUUtilization) { + // If we don't need CPU measurements, we only need a timestamp. + return RunningTimes(TimeStamp::Now()); + } + return GetThreadRunningTimesDiff(lock, unlockedThreadData); + }(); + + const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp(); + double threadSampleDeltaMs = + (now - CorePS::ProcessStartTime()).ToMilliseconds(); + + // If the thread is asleep and has been sampled before in the same + // sleep episode, or otherwise(*) if there was zero CPU activity + // since the previous sampling, find and copy the previous sample, + // as that's cheaper than taking a new sample. + // (*) Tech note: The asleep check is done first and always, because + // it is more reliable, and knows if it's the first asleep + // sample, which cannot be duplicated; if the test was the other + // way around, it could find zero CPU and then short-circuit + // that state-changing second-asleep-check operation, which + // could result in an unneeded sample. + // However we're using current running times (instead of copying the + // old ones) because some work could have happened. + if (threadStackSampling && + (unlockedThreadData.CanDuplicateLastSampleDueToSleep() || + runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0)))) { + const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample( + threadId, threadSampleDeltaMs, + profiledThreadData->LastSample(), runningTimesDiff); + if (dup_ok) { + continue; + } + } + + AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample); + + // Record the global profiler buffer's range start now, before + // adding the first entry for this thread's sample. + const uint64_t bufferRangeStart = buffer.BufferRangeStart(); + + // Add the thread ID now, so we know its position in the main + // buffer, which is used by some JS data. + // (DoPeriodicSample only knows about the temporary local buffer.) + const uint64_t samplePos = buffer.AddThreadIdEntry(threadId); + profiledThreadData->LastSample() = Some(samplePos); + + // Also add the time, so it's always there after the thread ID, as + // expected by the parser. (Other stack data is optional.) + buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack( + threadSampleDeltaMs)); + + Maybe unresponsiveDuration_ms; + + // If we have RunningTimes data, store it before the CompactStack. + // Note: It is not stored inside the CompactStack so that it doesn't + // get incorrectly duplicated when the thread is sleeping. + if (!runningTimesDiff.IsEmpty()) { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff); + } + + if (threadStackSampling) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock + lockedThreadData = offThreadRef.GetLockedRWFromAnyThread(); + // Suspend the thread and collect its stack data in the local + // buffer. + mSampler.SuspendAndSampleAndResumeThread( + lock, lockedThreadData.DataCRef(), now, + [&](const Registers& aRegs, const TimeStamp& aNow) { + DoPeriodicSample(lock, lockedThreadData.DataCRef(), aRegs, + samplePos, bufferRangeStart, + localProfileBuffer); + + // For "eventDelay", we want the input delay - but if + // there are no events in the input queue (or even if there + // are), we're interested in how long the delay *would* be + // for an input event now, which would be the time to finish + // the current event + the delay caused by any events + // already in the input queue (plus any High priority + // events). Events at lower priorities (in a + // PrioritizedEventQueue) than Input count for input delay + // only for the duration that they're running, since when + // they finish, any queued input event would run. + // + // Unless we record the time state of all events and queue + // states at all times, this is hard to precisely calculate, + // but we can approximate it well in post-processing with + // RunningEventDelay and RunningEventStart. + // + // RunningEventDelay is the time duration the event was + // queued before starting execution. RunningEventStart is + // the time the event started. (Note: since we care about + // Input event delays on MainThread, for + // PrioritizedEventQueues we return 0 for RunningEventDelay + // if the currently running event has a lower priority than + // Input (since Input events won't queue behind them). + // + // To directly measure this we would need to record the time + // at which the newest event currently in each queue at time + // X (the sample time) finishes running. This of course + // would require looking into the future, or recording all + // this state and then post-processing it later. If we were + // to trace every event start and end we could do this, but + // it would have significant overhead to do so (and buffer + // usage). From a recording of RunningEventDelays and + // RunningEventStarts we can infer the actual delay: + // + // clang-format off + // Event queue: D : C : B : A + // Time inserted (ms): 40 : 20 : 10 : 0 + // Run Time (ms): 30 : 100 : 40 : 30 + // + // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 + // [A||||||||||||] + // ----------[B|||||||||||||||||] + // -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] + // -----------------------------------------------------------------[D|||||||||...] + // + // Calculate the delay of a new event added at time t: (run every sample) + // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); + // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; + // delta = (now - last_sample_time); + // last_sample_time = now; + // for (t=effective_submission to now) { + // delay[t] += delta; + // } + // + // Can be reduced in overhead by: + // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); + // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; + // if (effective_submission != last_submission) { + // delta = (now - last_submision); + // // this loop should be made to match each sample point in the range + // // intead of assuming 1ms sampling as this pseudocode does + // for (t=last_submission to effective_submission-1) { + // delay[t] += delta; + // delta -= 1; // assumes 1ms; adjust as needed to match for() + // } + // last_submission = effective_submission; + // } + // + // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final + // hypothetical Submission Running @ result + // event E + // 0 Empty A 0 30 0 0 @0=10 30 + // 10 B A 0 60 0 0 @0=20, @10=10 60 + // 20 B A 0 150 0 0 @0=30, @10=20, @20=10 150 + // 30 C B 20 140 10 30 @10=20, @20=10, @30=0 140 + // 40 C B 20 160 @10=30, @20=20... 160 + // 50 C B 20 150 150 + // 60 C B 20 140 @10=50, @20=40... 140 + // 70 D C 50 130 20 70 @20=50, @30=40... 130 + // ... + // 160 D C 50 40 @20=140, @30=130... 40 + // 170 D 140 30 40 @40=140, @50=130... (rounding) 30 + // 180 D 140 20 40 @40=150 20 + // 190 D 140 10 40 @40=160 10 + // 200 0 0 NA 0 + // + // Function Delay(t) = the time between t and the time at which a hypothetical + // event e would start executing, if e was enqueued at time t. + // + // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running + // // instantly. + // Delay(0) = 30 // The hypothetical event e got enqueued just after A got + // // enqueued. It can start running at 30, when A is done. + // Delay(5) = 25 + // Delay(10) = 60 // Can start running at 70, after both A and B are done. + // Delay(19) = 51 + // Delay(20) = 150 // Can start running at 170, after A, B & C. + // Delay(25) = 145 + // Delay(30) = 170 // Can start running at 200, after A, B, C & D. + // Delay(120) = 80 + // Delay(200) = 0 // (assuming nothing was enqueued after D) + // + // For every event that gets enqueued, the Delay time will go up by the + // event's running time at the time at which the event is enqueued. + // The Delay function will be a sawtooth of the following shape: + // + // |\ |... + // | \ | + // |\ | \ | + // | \ | \ | + // |\ | \ | \ | + // |\ | \| \| \ | + // | \| \ | + // _| \____| + // + // + // A more complex example with a PrioritizedEventQueue: + // + // Event queue: D : C : B : A + // Time inserted (ms): 40 : 20 : 10 : 0 + // Run Time (ms): 30 : 100 : 40 : 30 + // Priority: Input: Norm: Norm: Norm + // + // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 + // [A||||||||||||] + // ----------[B|||||||||||||||||] + // ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] + // ---------------[D||||||||||||] + // + // + // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final + // hypothetical Submission Running @ result + // event + // 0 Empty A 0 30 0 0 @0=10 30 + // 10 B A 0 20 0 0 @0=20, @10=10 20 + // 20 B A 0 10 0 0 @0=30, @10=20, @20=10 10 + // 30 C B 0 40 30 30 @30=10 40 + // 40 C B 0 60 30 @40=10, @30=20 60 + // 50 C B 0 50 30 @50=10, @40=20, @30=30 50 + // 60 C B 0 40 30 @60=10, @50=20, @40=30, @30=40 40 + // 70 C D 30 30 40 70 @60=20, @50=30, @40=40 30 + // 80 C D 30 20 40 70 ...@50=40, @40=50 20 + // 90 C D 30 10 40 70 ...@60=40, @50=50, @40=60 10 + // 100 C 0 100 100 100 @100=10 100 + // 110 C 0 90 100 100 @110=10, @100=20 90 + + // + // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority. + // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running + // // instantly. + // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got + // // enqueued. It can start running at 30, when A is done. + // Delay(5) = 25 + // Delay(10) = 20 + // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not. + // // So e can start running at 30, when A is done. + // Delay(30) = 40 // Can start running at 70, after B is done. + // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority) + // Delay(80) = 20 + // Delay(100) = 100 // Wait for C to finish + + // clang-format on + // + // Alternatively we could insert (recycled instead of + // allocated/freed) input events at every sample period + // (1ms...), and use them to back-calculate the delay. This + // might also be somewhat expensive, and would require + // guessing at the maximum delay, which would likely be in + // the seconds, and so you'd need 1000's of pre-allocated + // events per queue per thread - so there would be a memory + // impact as well. + + TimeDuration currentEventDelay; + TimeDuration currentEventRunning; + lockedThreadData->GetRunningEventDelay( + aNow, currentEventDelay, currentEventRunning); + + // Note: eventDelay is a different definition of + // responsiveness than the 16ms event injection. + + // Don't suppress 0's for now; that can be a future + // optimization. We probably want one zero to be stored + // before we start suppressing, which would be more + // complex. + unresponsiveDuration_ms = + Some(currentEventDelay.ToMilliseconds() + + currentEventRunning.ToMilliseconds()); + }); + + if (cpuUtilization) { + // Suspending the thread for sampling could have added some + // running time to it, discard any since the call to + // GetThreadRunningTimesDiff above. + DiscardSuspendedThreadRunningTimes(lock, unlockedThreadData); + } + + // If we got eventDelay data, store it before the CompactStack. + // Note: It is not stored inside the CompactStack so that it + // doesn't get incorrectly duplicated when the thread is sleeping. + if (unresponsiveDuration_ms.isSome()) { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::UnresponsiveDurationMs, + *unresponsiveDuration_ms); + } + } + + // There *must* be a CompactStack after a TimeBeforeCompactStack; + // but note that other entries may have been concurrently inserted + // between the TimeBeforeCompactStack above and now. If the captured + // sample from `DoPeriodicSample` is complete, copy it into the + // global buffer, otherwise add an empty one to satisfy the parser + // that expects one. + auto state = localBuffer.GetState(); + if (NS_WARN_IF(state.mFailedPutBytes != + previousState.mFailedPutBytes)) { + LOG("Stack sample too big for local storage, failed to store %u " + "bytes", + unsigned(state.mFailedPutBytes - + previousState.mFailedPutBytes)); + // There *must* be a CompactStack after a TimeBeforeCompactStack, + // even an empty one. + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, + UniquePtr(nullptr)); + } else if (state.mRangeEnd - previousState.mRangeEnd >= + *profiler_get_core_buffer().BufferLength()) { + LOG("Stack sample too big for profiler storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + // There *must* be a CompactStack after a TimeBeforeCompactStack, + // even an empty one. + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, + UniquePtr(nullptr)); + } else { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, localBuffer); + } + + // Clean up for the next run. + localBuffer.Clear(); + previousState = localBuffer.GetState(); + } + } else { + samplingState = SamplingState::NoStackSamplingCompleted; + } + +#if defined(USE_LUL_STACKWALK) + // The LUL unwind object accumulates frame statistics. Periodically we + // should poke it to give it a chance to print those statistics. This + // involves doing I/O (fprintf, __android_log_print, etc.) and so + // can't safely be done from the critical section inside + // SuspendAndSampleAndResumeThread, which is why it is done here. + lul::LUL* lul = CorePS::Lul(); + if (lul) { + lul->MaybeShowStats(); + } +#endif + TimeStamp threadsSampled = TimeStamp::Now(); + + { + AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests); + ActivePS::FulfillChunkRequests(lock); + } + + buffer.CollectOverheadStats(sampleStartDeltaMs, + lockAcquired - sampleStart, + expiredMarkersCleaned - lockAcquired, + countersSampled - expiredMarkersCleaned, + threadsSampled - countersSampled); + } else { + samplingState = SamplingState::SamplingPaused; + } + } + // gPSMutex is not held after this point. + + // Invoke end-of-sampling callbacks outside of the locked scope. + InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), + samplingState); + + ProfilerChild::ProcessPendingUpdate(); + + if (ProfilerFeature::HasUnregisteredThreads(features)) { +#if defined(GP_OS_windows) + { + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + switch (mSpyingState) { + case SpyingState::SamplerToSpy_Start: + case SpyingState::Spy_Working: + // If the spy is working (or about to work), record this loop + // iteration to delay the next start. + ++mDelaySpyStart; + break; + case SpyingState::Spy_Waiting: + // The Spy is idle, waiting for instructions. Should we delay? + if (--mDelaySpyStart <= 0) { + mDelaySpyStart = 0; + mSpyingState = SpyingState::SamplerToSpy_Start; + mSpyingStateMonitor.NotifyAll(); + } + break; + default: + // Otherwise the spy should be initializing or shutting down. + MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing || + mSpyingState == SpyingState::MainToSpy_Shutdown || + mSpyingState == SpyingState::SpyToMain_ShuttingDown); + break; + } + } +#else + // On non-Windows platforms, this is fast enough to run in this thread, + // each sampling loop. + SpyOnUnregisteredThreads(); +#endif + } + + // We expect the next sampling loop to start `sampleInterval` after this + // loop here was scheduled to start. + scheduledSampleStart += sampleInterval; + + // Try to sleep until we reach that next scheduled time. + const TimeStamp beforeSleep = TimeStamp::Now(); + if (scheduledSampleStart >= beforeSleep) { + // There is still time before the next scheduled sample time. + const uint32_t sleepTimeUs = static_cast( + (scheduledSampleStart - beforeSleep).ToMicroseconds()); + if (sleepTimeUs >= minimumIntervalSleepUs) { + SleepMicro(sleepTimeUs); + } else { + // If we're too close to that time, sleep the minimum amount of time. + // Note that the next scheduled start is not shifted, so at the end of + // the next loop, sleep may again be adjusted to get closer to schedule. + SleepMicro(minimumIntervalSleepUs); + } + } else { + // This sampling loop ended after the next sampling should have started! + // There is little point to try and keep up to schedule now, it would + // require more work, while it's likely we're late because the system is + // already busy. Try and restart a normal schedule from now. + scheduledSampleStart = beforeSleep + sampleInterval; + SleepMicro(static_cast(sampleInterval.ToMicroseconds())); + } + } + + // End of `while` loop. We can only be here from a `break` inside the loop. + InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState); +} + +namespace geckoprofiler::markers { + +struct UnregisteredThreadLifetimeMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("UnregisteredThreadLifetime"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + base::ProcessId aThreadId, + const ProfilerString8View& aName, + const ProfilerString8View& aEndEvent) { + aWriter.IntProperty("Thread Id", aThreadId); + aWriter.StringProperty("Thread Name", aName.Length() != 0 + ? aName.AsSpan() + : MakeStringSpan("~Unnamed~")); + if (aEndEvent.Length() != 0) { + aWriter.StringProperty("End Event", aEndEvent); + } + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer, + MS::Searchable::Searchable); + schema.AddKeyFormatSearchable("Thread Name", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyFormat("End Event", MS::Format::String); + schema.AddStaticLabelValue( + "Note", + "Start and end are approximate, based on first and last appearances."); + schema.SetChartLabel( + "{marker.data.Thread Name} (tid {marker.data.Thread Id})"); + schema.SetTableLabel("{marker.name} lifetime"); + return schema; + } +}; + +struct UnregisteredThreadCPUMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("UnregisteredThreadCPU"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + base::ProcessId aThreadId, + int64_t aCPUDiffNs, const TimeStamp& aStart, + const TimeStamp& aEnd) { + aWriter.IntProperty("Thread Id", aThreadId); + aWriter.IntProperty("CPU Time", aCPUDiffNs); + aWriter.DoubleProperty( + "CPU Utilization", + double(aCPUDiffNs) / ((aEnd - aStart).ToMicroseconds() * 1000.0)); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer, + MS::Searchable::Searchable); + schema.AddKeyFormat("CPU Time", MS::Format::Nanoseconds); + schema.AddKeyFormat("CPU Utilization", MS::Format::Percentage); + schema.SetChartLabel("{marker.data.CPU Utilization}"); + schema.SetTableLabel( + "{marker.name} - Activity: {marker.data.CPU Utilization}"); + return schema; + } +}; + +} // namespace geckoprofiler::markers + +static bool IsThreadIdRegistered(ProfilerThreadId aThreadId) { + ThreadRegistry::LockedRegistry lockedRegistry; + const auto registryEnd = lockedRegistry.end(); + return std::find_if( + lockedRegistry.begin(), registryEnd, + [aThreadId](const ThreadRegistry::OffThreadRef& aOffThreadRef) { + return aOffThreadRef.UnlockedConstReaderCRef() + .Info() + .ThreadId() == aThreadId; + }) != registryEnd; +} + +static nsAutoCString MakeThreadInfoMarkerName(base::ProcessId aThreadId, + const nsACString& aName) { + nsAutoCString markerName{"tid "}; + markerName.AppendInt(int64_t(aThreadId)); + if (!aName.IsEmpty()) { + markerName.AppendLiteral(" "); + markerName.Append(aName); + } + return markerName; +} + +void SamplerThread::SpyOnUnregisteredThreads() { + const TimeStamp unregisteredThreadSearchStart = TimeStamp::Now(); + + const base::ProcessId currentProcessId = + base::ProcessId(profiler_current_process_id().ToNumber()); + nsTArray request(1); + request.EmplaceBack( + /* aPid = */ currentProcessId, + /* aProcessType = */ ProcType::Unknown, + /* aOrigin = */ ""_ns, + /* aWindowInfo = */ nsTArray{}, + /* aUtilityInfo = */ nsTArray{}, + /* aChild = */ 0 +#ifdef XP_MACOSX + , + /* aChildTask = */ MACH_PORT_NULL +#endif // XP_MACOSX + ); + + const ProcInfoPromise::ResolveOrRejectValue procInfoOrError = + GetProcInfoSync(std::move(request)); + + if (!procInfoOrError.IsResolve()) { + PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Could not retrieve any process information"); + return; + } + + const auto& procInfoHashMap = procInfoOrError.ResolveValue(); + // Expecting the requested (current) process information to be present in the + // hashmap. + const auto& procInfoPtr = + procInfoHashMap.readonlyThreadsafeLookup(currentProcessId); + if (!procInfoPtr) { + PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Could not retrieve information about this process"); + return; + } + + // Record the time spent so far, which is OS-bound... + PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Work to discover threads"); + + // ... and record the time needed to process the data, which we can control. + AUTO_PROFILER_MARKER_TEXT( + "Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread()), + "Work to process discovered threads and record unregistered ones"_ns); + + const Span threads = procInfoPtr->value().threads; + + // mLastSpying timestamp should be null only at the beginning of a session, + // when mSpiedThreads is still empty. + MOZ_ASSERT_IF(mLastSpying.IsNull(), mSpiedThreads.IsEmpty()); + + const TimeStamp previousSpying = std::exchange(mLastSpying, TimeStamp::Now()); + + // Find threads that were spied on but are not present anymore. + const auto threadsBegin = threads.begin(); + const auto threadsEnd = threads.end(); + for (size_t spiedThreadIndexPlus1 = mSpiedThreads.Length(); + spiedThreadIndexPlus1 != 0; --spiedThreadIndexPlus1) { + const SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndexPlus1 - 1]; + if (std::find_if(threadsBegin, threadsEnd, + [spiedTid = spiedThread.mThreadId]( + const mozilla::ThreadInfo& aThreadInfo) { + return aThreadInfo.tid == spiedTid; + }) == threadsEnd) { + // This spied thread is gone. + PROFILER_MARKER( + MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the end between this update and the previous one. + MarkerTiming::IntervalEnd(previousSpying + + (mLastSpying - previousSpying) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, spiedThread.mThreadId, + spiedThread.mName, "Thread disappeared"); + + // Don't spy on it anymore, assuming it won't come back. + mSpiedThreads.RemoveElementAt(spiedThreadIndexPlus1 - 1); + } + } + + for (const mozilla::ThreadInfo& threadInfo : threads) { + // Index of this encountered thread in mSpiedThreads, or NoIndex. + size_t spiedThreadIndex = mSpiedThreads.IndexOf(threadInfo.tid); + if (IsThreadIdRegistered(ProfilerThreadId::FromNumber(threadInfo.tid))) { + // This thread id is already officially registered. + if (spiedThreadIndex != SpiedThreads::NoIndex) { + // This now-registered thread was previously being spied. + SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex]; + PROFILER_MARKER( + MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the end between this update and the previous one. + // TODO: Find the real time from the thread registration? + MarkerTiming::IntervalEnd(previousSpying + + (mLastSpying - previousSpying) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, spiedThread.mThreadId, + spiedThread.mName, "Thread registered itself"); + + // Remove from mSpiedThreads, since it can be profiled normally. + mSpiedThreads.RemoveElement(threadInfo.tid); + } + } else { + // This thread id is not registered. + if (spiedThreadIndex == SpiedThreads::NoIndex) { + // This unregistered thread has not been spied yet, store it now. + NS_ConvertUTF16toUTF8 name(threadInfo.name); + mSpiedThreads.EmplaceBack(threadInfo.tid, name, threadInfo.cpuTime); + + PROFILER_MARKER( + MakeThreadInfoMarkerName(threadInfo.tid, name), PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the start between this update and the previous one (or + // the start of this search if it's the first one). + MarkerTiming::IntervalStart( + mLastSpying - + (mLastSpying - (previousSpying.IsNull() + ? unregisteredThreadSearchStart + : previousSpying)) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, threadInfo.tid, name, + /* aEndEvent */ ""); + } else { + // This unregistered thread was already being spied, record its work. + SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex]; + int64_t diffCPUTimeNs = + int64_t(threadInfo.cpuTime) - int64_t(spiedThread.mCPUTimeNs); + spiedThread.mCPUTimeNs = threadInfo.cpuTime; + if (diffCPUTimeNs != 0) { + PROFILER_MARKER( + MakeThreadInfoMarkerName(threadInfo.tid, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + MarkerTiming::Interval(previousSpying, mLastSpying)), + UnregisteredThreadCPUMarker, threadInfo.tid, diffCPUTimeNs, + previousSpying, mLastSpying); + } + } + } + } + + PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Work to discover and record unregistered threads"); +} + +// We #include these files directly because it means those files can use +// declarations from this file trivially. These provide target-specific +// implementations of all SamplerThread methods except Run(). +#if defined(GP_OS_windows) +# include "platform-win32.cpp" +#elif defined(GP_OS_darwin) +# include "platform-macos.cpp" +#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include "platform-linux-android.cpp" +#else +# error "bad platform" +#endif + +// END SamplerThread +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf) + +NS_IMETHODIMP +GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport, + nsISupports* aData, bool aAnonymize) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + size_t profSize = 0; + size_t lulSize = 0; + + { + PSAutoLock lock; + + if (CorePS::Exists()) { + CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize); + } + + if (ActivePS::Exists(lock)) { + profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf); + } + } + + MOZ_COLLECT_REPORT( + "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize, + "Memory used by the Gecko Profiler's global state (excluding memory used " + "by LUL)."); + +#if defined(USE_LUL_STACKWALK) + MOZ_COLLECT_REPORT( + "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize, + "Memory used by LUL, a stack unwinder used by the Gecko Profiler."); +#endif + + return NS_OK; +} + +NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter) + +static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) { + if (strcmp(aFeature, "default") == 0) { + return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures()) + : DefaultFeatures()) & + AvailableFeatures(); + } + +#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \ + if (strcmp(aFeature, str_) == 0) { \ + return ProfilerFeature::Name_; \ + } + + PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT) + +#undef PARSE_FEATURE_BIT + + printf("\nUnrecognized feature \"%s\".\n\n", aFeature); + // Since we may have an old feature we don't implement anymore, don't exit. + PrintUsage(); + return 0; +} + +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup /* = false */) { + uint32_t features = 0; + for (size_t i = 0; i < aFeatureCount; i++) { + features |= ParseFeature(aFeatures[i], aIsStartup); + } + return features; +} + +static ProfilingStack* locked_register_thread( + PSLockRef aLock, ThreadRegistry::OffThreadRef aOffThreadRef) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + VTUNE_REGISTER_THREAD(aOffThreadRef.UnlockedConstReaderCRef().Info().Name()); + + if (ActivePS::Exists(aLock)) { + ThreadProfilingFeatures threadProfilingFeatures = + ActivePS::ProfilingFeaturesForThread( + aLock, aOffThreadRef.UnlockedConstReaderCRef().Info()); + if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock + lockedRWFromAnyThread = aOffThreadRef.GetLockedRWFromAnyThread(); + + ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread( + aLock, MakeUnique( + aOffThreadRef.UnlockedConstReaderCRef().Info())); + lockedRWFromAnyThread->SetProfilingFeaturesAndData( + threadProfilingFeatures, profiledThreadData, aLock); + + if (ActivePS::FeatureJS(aLock)) { + lockedRWFromAnyThread->StartJSSampling(ActivePS::JSFlags(aLock)); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedRWFromAnyThread.GetLockedRWOnThread(); + lockedRWOnThread) { + // We can manually poll the current thread so it starts sampling + // immediately. + lockedRWOnThread->PollJSSampling(); + } + if (lockedRWFromAnyThread->GetJSContext()) { + profiledThreadData->NotifyReceivedJSContext( + ActivePS::Buffer(aLock).BufferRangeEnd()); + } + } + } + } + + return &aOffThreadRef.UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef(); +} + +static void NotifyObservers(const char* aTopic, + nsISupports* aSubject = nullptr) { + if (!NS_IsMainThread()) { + // Dispatch a task to the main thread that notifies observers. + // If NotifyObservers is called both on and off the main thread within a + // short time, the order of the notifications can be different from the + // order of the calls to NotifyObservers. + // Getting the order 100% right isn't that important at the moment, because + // these notifications are only observed in the parent process, where the + // profiler_* functions are currently only called on the main thread. + nsCOMPtr subject = aSubject; + NS_DispatchToMainThread(NS_NewRunnableFunction( + "NotifyObservers", [=] { NotifyObservers(aTopic, subject); })); + return; + } + + if (nsCOMPtr os = services::GetObserverService()) { + os->NotifyObservers(aSubject, aTopic, nullptr); + } +} + +[[nodiscard]] static RefPtr NotifyProfilerStarted( + const PowerOfTwo32& aCapacity, const Maybe& aDuration, + double aInterval, uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID) { + nsTArray filtersArray; + for (size_t i = 0; i < aFilterCount; ++i) { + filtersArray.AppendElement(aFilters[i]); + } + + nsCOMPtr params = new nsProfilerStartParams( + aCapacity.Value(), aDuration, aInterval, aFeatures, + std::move(filtersArray), aActiveTabID); + + RefPtr startPromise = ProfilerParent::ProfilerStarted(params); + NotifyObservers("profiler-started", params); + return startPromise; +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe& aDuration); + +// This basically duplicates AutoProfilerLabel's constructor. +static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString, + void* aSp) { + ThreadRegistration::OnThreadPtr onThreadPtr = + ThreadRegistration::GetOnThreadPtr(); + if (!onThreadPtr) { + return nullptr; + } + ProfilingStack& profilingStack = + onThreadPtr->UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef(); + profilingStack.pushLabelFrame(aLabel, aDynamicString, aSp, + JS::ProfilingCategoryPair::OTHER); + return &profilingStack; +} + +// This basically duplicates AutoProfilerLabel's destructor. +static void MozGlueLabelExit(void* aProfilingStack) { + if (aProfilingStack) { + reinterpret_cast(aProfilingStack)->pop(); + } +} + +static Vector SplitAtCommas(const char* aString, + UniquePtr& aStorage) { + size_t len = strlen(aString); + aStorage = MakeUnique(len + 1); + PodCopy(aStorage.get(), aString, len + 1); + + // Iterate over all characters in aStorage and split at commas, by + // overwriting commas with the null char. + Vector array; + size_t currentElementStart = 0; + for (size_t i = 0; i <= len; i++) { + if (aStorage[i] == ',') { + aStorage[i] = '\0'; + } + if (aStorage[i] == '\0') { + // Only add non-empty elements, otherwise ParseFeatures would later + // complain about unrecognized features. + if (currentElementStart != i) { + MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart])); + } + currentElementStart = i + 1; + } + } + return array; +} + +void profiler_init_threadmanager() { + LOG("profiler_init_threadmanager"); + + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithLockedRWOnThread( + [](ThreadRegistration::LockedRWOnThread& aThreadData) { + if (!aThreadData.GetEventTarget()) { + aThreadData.ResetMainThread(NS_GetCurrentThreadNoCreate()); + } + }); + }); +} + +static const char* get_size_suffix(const char* str) { + const char* ptr = str; + + while (isdigit(*ptr)) { + ptr++; + } + + return ptr; +} + +void profiler_init(void* aStackTop) { + LOG("profiler_init"); + + profiler_init_main_thread_id(); + + VTUNE_INIT(); + + MOZ_RELEASE_ASSERT(!CorePS::Exists()); + + if (getenv("MOZ_PROFILER_HELP")) { + PrintUsage(); + exit(0); + } + + SharedLibraryInfo::Initialize(); + + uint32_t features = DefaultFeatures() & AvailableFeatures(); + + UniquePtr filterStorage; + + Vector filters; + MOZ_RELEASE_ASSERT(filters.append("GeckoMain")); + MOZ_RELEASE_ASSERT(filters.append("Compositor")); + MOZ_RELEASE_ASSERT(filters.append("Renderer")); + MOZ_RELEASE_ASSERT(filters.append("DOM Worker")); + + PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES; + Maybe duration = Nothing(); + double interval = PROFILER_DEFAULT_INTERVAL; + uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID; + + ThreadRegistration::RegisterThread(kMainThreadName, aStackTop); + + { + PSAutoLock lock; + + // We've passed the possible failure point. Instantiate CorePS, which + // indicates that the profiler has initialized successfully. + CorePS::Create(lock); + + // Make sure threads already in the ThreadRegistry (like the main thread) + // get registered in CorePS as well. + { + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + locked_register_thread(lock, offThreadRef); + } + } + + // Platform-specific initialization. + PlatformInit(lock); + +#if defined(GP_OS_android) + if (jni::IsAvailable()) { + GeckoJavaSampler::Init(); + } +#endif + + // (Linux-only) We could create CorePS::mLul and read unwind info into it + // at this point. That would match the lifetime implied by destruction of + // it in profiler_shutdown() just below. However, that gives a big delay on + // startup, even if no profiling is actually to be done. So, instead, it is + // created on demand at the first call to PlatformStart(). + + const char* startupEnv = getenv("MOZ_PROFILER_STARTUP"); + if (!startupEnv || startupEnv[0] == '\0' || + ((startupEnv[0] == '0' || startupEnv[0] == 'N' || + startupEnv[0] == 'n') && + startupEnv[1] == '\0')) { + return; + } + + LOG("- MOZ_PROFILER_STARTUP is set"); + + // Startup default capacity may be different. + capacity = PROFILER_DEFAULT_STARTUP_ENTRIES; + + const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES"); + if (startupCapacity && startupCapacity[0] != '\0') { + errno = 0; + long capacityLong = strtol(startupCapacity, nullptr, 10); + std::string_view sizeSuffix = get_size_suffix(startupCapacity); + + if (sizeSuffix == "KB") { + capacityLong *= 1000 / scBytesPerEntry; + } else if (sizeSuffix == "KiB") { + capacityLong *= 1024 / scBytesPerEntry; + } else if (sizeSuffix == "MB") { + capacityLong *= (1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "MiB") { + capacityLong *= (1024 * 1024) / scBytesPerEntry; + } else if (sizeSuffix == "GB") { + capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "GiB") { + capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry; + } else if (!sizeSuffix.empty()) { + LOG("- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the " + "following: KB, KiB, MB, MiB, GB, GiB"); + PrintUsage(); + exit(1); + } + + // `long` could be 32 or 64 bits, so we force a 64-bit comparison with + // the maximum 32-bit signed number (as more than that is clamped down to + // 2^31 anyway). + if (errno == 0 && capacityLong > 0 && + static_cast(capacityLong) <= + static_cast(INT32_MAX)) { + capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries( + static_cast(capacityLong))); + LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value())); + } else { + LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s", + startupCapacity); + PrintUsage(); + exit(1); + } + } + + const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION"); + if (startupDuration && startupDuration[0] != '\0') { + errno = 0; + double durationVal = PR_strtod(startupDuration, nullptr); + if (errno == 0 && durationVal >= 0.0) { + if (durationVal > 0.0) { + duration = Some(durationVal); + } + LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal); + } else { + LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s", + startupDuration); + PrintUsage(); + exit(1); + } + } + + const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL"); + if (startupInterval && startupInterval[0] != '\0') { + errno = 0; + interval = PR_strtod(startupInterval, nullptr); + if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) { + LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval); + } else { + LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s", + startupInterval); + PrintUsage(); + exit(1); + } + } + + features |= StartupExtraDefaultFeatures() & AvailableFeatures(); + + const char* startupFeaturesBitfield = + getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD"); + if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') { + errno = 0; + features = strtol(startupFeaturesBitfield, nullptr, 10); + if (errno == 0) { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features); + } else { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s", + startupFeaturesBitfield); + PrintUsage(); + exit(1); + } + } else { + const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES"); + if (startupFeatures) { + // Interpret startupFeatures as a list of feature strings, separated by + // commas. + UniquePtr featureStringStorage; + Vector featureStringArray = + SplitAtCommas(startupFeatures, featureStringStorage); + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length(), + /* aIsStartup */ true); + LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features); + } + } + + const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS"); + if (startupFilters && startupFilters[0] != '\0') { + filters = SplitAtCommas(startupFilters, filterStorage); + LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters); + + if (mozilla::profiler::detail::FiltersExcludePid(filters)) { + LOG(" -> This process is excluded and won't be profiled"); + return; + } + } + + const char* startupActiveTabID = + getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID"); + if (startupActiveTabID && startupActiveTabID[0] != '\0') { + std::istringstream iss(startupActiveTabID); + iss >> activeTabID; + if (!iss.fail()) { + LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID); + } else { + LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid " + "uint64_t: %s", + startupActiveTabID); + PrintUsage(); + exit(1); + } + } + + locked_profiler_start(lock, capacity, interval, features, filters.begin(), + filters.length(), activeTabID, duration); + } + + // The GeckoMain thread registration happened too early to record a marker, + // so let's record it again now. + profiler_mark_thread_awake(); + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Start counting memory allocations (outside of lock because this may call + // profiler_add_sampled_counter which would attempt to take the lock.) + ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks()); +#endif + + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + // We do this with gPSMutex unlocked. The comment in profiler_stop() explains + // why. + Unused << NotifyProfilerStarted(capacity, duration, interval, features, + filters.begin(), filters.length(), 0); +} + +static void locked_profiler_save_profile_to_file( + PSLockRef aLock, const char* aFilename, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown); + +static SamplerThread* locked_profiler_stop(PSLockRef aLock); + +void profiler_shutdown(IsFastShutdown aIsFastShutdown) { + LOG("profiler_shutdown"); + + VTUNE_SHUTDOWN(); + + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::Stopping); + } + invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + ProfilerParent::ProfilerWillStopIfStarted(); + + // If the profiler is active we must get a handle to the SamplerThread before + // ActivePS is destroyed, in order to delete it. + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Save the profile on shutdown if requested. + if (ActivePS::Exists(lock)) { + const char* filename = getenv("MOZ_PROFILER_SHUTDOWN"); + if (filename && filename[0] != '\0') { + locked_profiler_save_profile_to_file(lock, filename, + preRecordedMetaInformation, + /* aIsShuttingDown */ true); + } + if (aIsFastShutdown == IsFastShutdown::Yes) { + return; + } + + samplerThread = locked_profiler_stop(lock); + } else if (aIsFastShutdown == IsFastShutdown::Yes) { + return; + } + + CorePS::Destroy(lock); + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + + // Reverse the registration done in profiler_init. + ThreadRegistration::UnregisterThread(); +} + +static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter, + double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("WriteProfileToJSONWriter"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + aWriter.Start(); + { + auto rv = profiler_stream_json_for_this_process( + aWriter, aSinceTime, aIsShuttingDown, aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, + "WriteProfileToJSONWriter: " + "profiler_stream_json_for_this_process started", + 100_pc, + "WriteProfileToJSONWriter: " + "profiler_stream_json_for_this_process done")); + + if (rv.isErr()) { + return false; + } + + // Don't include profiles from other processes because this is a + // synchronous function. + aWriter.StartArrayProperty("processes"); + aWriter.EndArray(); + } + aWriter.End(); + return !aWriter.Failed(); +} + +void profiler_set_process_name(const nsACString& aProcessName, + const nsACString* aETLDplus1) { + LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(), + aETLDplus1 ? aETLDplus1->Data() : ""); + PSAutoLock lock; + CorePS::SetProcessName(lock, aProcessName); + if (aETLDplus1) { + CorePS::SetETLDplus1(lock, *aETLDplus1); + } +} + +UniquePtr profiler_get_profile(double aSinceTime, + bool aIsShuttingDown) { + LOG("profiler_get_profile"); + + UniquePtr service = + profiler_code_address_service_for_presymbolication(); + + FailureLatchSource failureLatch; + SpliceableChunkedJSONWriter b{failureLatch}; + if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, service.get(), + ProgressLogger{})) { + return nullptr; + } + return b.ChunkedWriteFunc().CopyData(); +} + +[[nodiscard]] bool profiler_get_profile_json( + SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter, + double aSinceTime, bool aIsShuttingDown, + mozilla::ProgressLogger aProgressLogger) { + LOG("profiler_get_profile_json"); + + UniquePtr service = + profiler_code_address_service_for_presymbolication(); + + return WriteProfileToJSONWriter( + aSpliceableChunkedJSONWriter, aSinceTime, aIsShuttingDown, service.get(), + aProgressLogger.CreateSubLoggerFromTo( + 0.1_pc, "profiler_get_profile_json: WriteProfileToJSONWriter started", + 99.9_pc, "profiler_get_profile_json: WriteProfileToJSONWriter done")); +} + +void profiler_get_start_params(int* aCapacity, Maybe* aDuration, + double* aInterval, uint32_t* aFeatures, + Vector* aFilters, + uint64_t* aActiveTabID) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) || + NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) || + NS_WARN_IF(!aFilters)) { + return; + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + *aCapacity = 0; + *aDuration = Nothing(); + *aInterval = 0; + *aFeatures = 0; + *aActiveTabID = 0; + aFilters->clear(); + return; + } + + *aCapacity = ActivePS::Capacity(lock).Value(); + *aDuration = ActivePS::Duration(lock); + *aInterval = ActivePS::Interval(lock); + *aFeatures = ActivePS::Features(lock); + *aActiveTabID = ActivePS::ActiveTabID(lock); + + const Vector& filters = ActivePS::Filters(lock); + MOZ_ALWAYS_TRUE(aFilters->resize(filters.length())); + for (uint32_t i = 0; i < filters.length(); ++i) { + (*aFilters)[i] = filters[i].c_str(); + } +} + +ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (NS_WARN_IF(!ActivePS::Exists(lock))) { + return nullptr; + } + return &ActivePS::ControlledChunkManager(lock); +} + +namespace mozilla { + +void GetProfilerEnvVarsForChildProcess( + std::function&& aSetEnv) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + aSetEnv("MOZ_PROFILER_STARTUP", ""); + return; + } + + aSetEnv("MOZ_PROFILER_STARTUP", "1"); + + // If MOZ_PROFILER_SHUTDOWN is defined, make sure it's empty in children, so + // that they don't attempt to write over that file. + if (getenv("MOZ_PROFILER_SHUTDOWN")) { + aSetEnv("MOZ_PROFILER_SHUTDOWN", ""); + } + + // Hidden option to stop Base Profiler, mostly due to Talos intermittents, + // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3 + // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325. + if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) { + aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1"); + } + + auto capacityString = + Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value())); + aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get()); + + // Use AppendFloat instead of Smprintf with %f because the decimal + // separator used by %f is locale-dependent. But the string we produce needs + // to be parseable by strtod, which only accepts the period character as a + // decimal separator. AppendFloat always uses the period character. + nsCString intervalString; + intervalString.AppendFloat(ActivePS::Interval(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get()); + + auto featuresString = Smprintf("%d", ActivePS::Features(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get()); + + std::string filtersString; + const Vector& filters = ActivePS::Filters(lock); + for (uint32_t i = 0; i < filters.length(); ++i) { + if (i != 0) { + filtersString += ","; + } + filtersString += filters[i]; + } + aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str()); + + auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get()); +} + +} // namespace mozilla + +void profiler_received_exit_profile(const nsACString& aExitProfile) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + return; + } + ActivePS::AddExitProfile(lock, aExitProfile); +} + +Vector profiler_move_exit_profiles() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + Vector profiles; + if (ActivePS::Exists(lock)) { + profiles = ActivePS::MoveExitProfiles(lock); + } + return profiles; +} + +static void locked_profiler_save_profile_to_file( + PSLockRef aLock, const char* aFilename, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown = false) { + nsAutoCString processedFilename(aFilename); + const auto processInsertionIndex = processedFilename.Find("%p"); + if (processInsertionIndex != kNotFound) { + // Replace "%p" with the process id. + nsAutoCString process; + process.AppendInt(profiler_current_process_id().ToNumber()); + processedFilename.Replace(processInsertionIndex, 2, process); + LOG("locked_profiler_save_profile_to_file(\"%s\" -> \"%s\")", aFilename, + processedFilename.get()); + } else { + LOG("locked_profiler_save_profile_to_file(\"%s\")", aFilename); + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + std::ofstream stream; + stream.open(processedFilename.get()); + if (stream.is_open()) { + OStreamJSONWriteFunc sw(stream); + SpliceableJSONWriter w(sw, FailureLatchInfallibleSource::Singleton()); + w.Start(); + { + Unused << locked_profiler_stream_json_for_this_process( + aLock, w, /* sinceTime */ 0, aPreRecordedMetaInformation, + aIsShuttingDown, nullptr, ProgressLogger{}); + + w.StartArrayProperty("processes"); + Vector exitProfiles = ActivePS::MoveExitProfiles(aLock); + for (auto& exitProfile : exitProfiles) { + if (!exitProfile.IsEmpty() && exitProfile[0] != '*') { + w.Splice(exitProfile); + } + } + w.EndArray(); + } + w.End(); + + stream.close(); + } +} + +void profiler_save_profile_to_file(const char* aFilename) { + LOG("profiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + locked_profiler_save_profile_to_file(lock, aFilename, + preRecordedMetaInformation); +} + +uint32_t profiler_get_available_features() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + return AvailableFeatures(); +} + +Maybe profiler_get_buffer_info() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return Nothing(); + } + + return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo()); +} + +static void PollJSSamplingForCurrentThread() { + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithLockedRWOnThread( + [](ThreadRegistration::LockedRWOnThread& aThreadData) { + aThreadData.PollJSSampling(); + }); + }); +} + +// When the profiler is started on a background thread, we can't synchronously +// call PollJSSampling on the main thread's ThreadInfo. And the next regular +// call to PollJSSampling on the main thread would only happen once the main +// thread triggers a JS interrupt callback. +// This means that all the JS execution between profiler_start() and the first +// JS interrupt would happen with JS sampling disabled, and we wouldn't get any +// JS function information for that period of time. +// So in order to start JS sampling as soon as possible, we dispatch a runnable +// to the main thread which manually calls PollJSSamplingForCurrentThread(). +// In some cases this runnable will lose the race with the next JS interrupt. +// That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls. +static void TriggerPollJSSamplingOnMainThread() { + nsCOMPtr mainThread; + nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread)); + if (NS_SUCCEEDED(rv) && mainThread) { + nsCOMPtr task = + NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread", + []() { PollJSSamplingForCurrentThread(); }); + SchedulerGroup::Dispatch(TaskCategory::Other, task.forget()); + } +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe& aDuration) { + TimeStamp profilingStartTime = TimeStamp::Now(); + + if (LOG_TEST) { + LOG("locked_profiler_start"); + LOG("- capacity = %u", unsigned(aCapacity.Value())); + LOG("- duration = %.2f", aDuration ? *aDuration : -1); + LOG("- interval = %.2f", aInterval); + LOG("- tab ID = %" PRIu64, aActiveTabID); + +#define LOG_FEATURE(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + LOG("- feature = %s", str_); \ + } + + PROFILER_FOR_EACH_FEATURE(LOG_FEATURE) + +#undef LOG_FEATURE + + for (uint32_t i = 0; i < aFilterCount; i++) { + LOG("- threads = %s", aFilters[i]); + } + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock)); + + // Do this before the Base Profiler is stopped, to keep the existing buffer + // (if any) alive for our use. + if (NS_IsMainThread()) { + mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker(); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("EnsureBufferForMainThreadAddMarker", + &mozilla::base_profiler_markers_detail:: + EnsureBufferForMainThreadAddMarker)); + } + + UniquePtr baseChunkManager; + bool profilersHandOver = false; + if (baseprofiler::profiler_is_active()) { + // Note that we still hold the lock, so the sampler cannot run yet and + // interact negatively with the still-active BaseProfiler sampler. + // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP. + + // Take ownership of the chunk manager from the Base Profiler, to extend its + // lifetime during the new Gecko Profiler session. Since we're using the + // same core buffer, all the base profiler data remains. + baseChunkManager = baseprofiler::detail::ExtractBaseProfilerChunkManager(); + + if (baseChunkManager) { + profilersHandOver = true; + if (const TimeStamp baseProfilingStartTime = + baseprofiler::detail::GetProfilingStartTime(); + !baseProfilingStartTime.IsNull()) { + profilingStartTime = baseProfilingStartTime; + } + + BASE_PROFILER_MARKER_TEXT( + "Profilers handover", PROFILER, MarkerTiming::IntervalStart(), + "Transition from Base to Gecko Profiler, some data may be missing"); + } + + // Now stop Base Profiler (BP), as further recording will be ignored anyway, + // and so that it won't clash with Gecko Profiler (GP) sampling starting + // after the lock is dropped. + // On Linux this is especially important to do before creating the GP + // sampler, because the BP sampler may send a signal (to stop threads to be + // sampled), which the GP would intercept before its own initialization is + // complete and ready to handle such signals. + // Note that even though `profiler_stop()` doesn't immediately destroy and + // join the sampler thread, it safely deactivates it in such a way that the + // thread will soon exit without doing any actual work. + // TODO: Allow non-sampling profiling to continue. + // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown. + baseprofiler::profiler_stop(); + } + +#if defined(GP_PLAT_amd64_windows) + InitializeWin64ProfilerHooks(); +#endif + + // Fall back to the default values if the passed-in values are unreasonable. + // We want to be able to store at least one full stack. + PowerOfTwo32 capacity = + (aCapacity.Value() >= + ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry) + ? aCapacity + : PROFILER_DEFAULT_ENTRIES; + Maybe duration = aDuration; + + if (aDuration && *aDuration <= 0) { + duration = Nothing(); + } + + double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL; + + ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures, + aFilters, aFilterCount, aActiveTabID, duration, + std::move(baseChunkManager)); + + // ActivePS::Create can only succeed or crash. + MOZ_ASSERT(ActivePS::Exists(aLock)); + + // Set up profiling for each registered thread, if appropriate. +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + bool isMainThreadBeingProfiled = false; +#endif + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + const ThreadRegistrationInfo& info = + offThreadRef.UnlockedConstReaderCRef().Info(); + + ThreadProfilingFeatures threadProfilingFeatures = + ActivePS::ProfilingFeaturesForThread(aLock, info); + if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.GetLockedRWFromAnyThread(); + ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread( + aLock, MakeUnique(info)); + lockedThreadData->SetProfilingFeaturesAndData(threadProfilingFeatures, + profiledThreadData, aLock); + lockedThreadData->GetNewCpuTimeInNs(); + if (ActivePS::FeatureJS(aLock)) { + lockedThreadData->StartJSSampling(ActivePS::JSFlags(aLock)); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedThreadData.GetLockedRWOnThread(); + lockedRWOnThread) { + // We can manually poll the current thread so it starts sampling + // immediately. + lockedRWOnThread->PollJSSampling(); + } else if (info.IsMainThread()) { + // Dispatch a runnable to the main thread to call + // PollJSSampling(), so that we don't have wait for the next JS + // interrupt callback in order to start profiling JS. + TriggerPollJSSamplingOnMainThread(); + } + } +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (info.IsMainThread()) { + isMainThreadBeingProfiled = true; + } +#endif + lockedThreadData->ReinitializeOnResume(); + if (ActivePS::FeatureJS(aLock) && lockedThreadData->GetJSContext()) { + profiledThreadData->NotifyReceivedJSContext(0); + } + } + } + + // Setup support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + int javaInterval = interval; + // Java sampling doesn't accurately keep up with the sampling rate that is + // lower than 1ms. + if (javaInterval < 1) { + javaInterval = 1; + } + + JNIEnv* env = jni::GetEnvForThread(); + const auto& filters = ActivePS::Filters(aLock); + jni::ObjectArray::LocalRef javaFilters = + jni::ObjectArray::New(filters.length()); + for (size_t i = 0; i < filters.length(); i++) { + javaFilters->SetElement(i, jni::StringParam(filters[i].data(), env)); + } + + // Send the interval-relative entry count, but we have 100000 hard cap in + // the java code, it can't be more than that. + java::GeckoJavaSampler::Start( + javaFilters, javaInterval, + std::round((double)(capacity.Value()) * interval / + (double)(javaInterval))); + } +#endif + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::FeatureNativeAllocations(aLock)) { + if (isMainThreadBeingProfiled) { + mozilla::profiler::enable_native_allocations(); + } else { + NS_WARNING( + "The nativeallocations feature is turned on, but the main thread is " + "not being profiled. The allocations are only stored on the main " + "thread."); + } + } +#endif + + if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) { + StartAudioCallbackTracing(); + } + + // At the very end, set up RacyFeatures. + RacyFeatures::SetActive(ActivePS::Features(aLock)); + + if (profilersHandOver) { + PROFILER_MARKER_UNTYPED("Profilers handover", PROFILER, + MarkerTiming::IntervalEnd()); + } +} + +RefPtr profiler_start(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe& aDuration) { + LOG("profiler_start"); + + ProfilerParent::ProfilerWillStopIfStarted(); + + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + // Reset the current state if the profiler is running. + if (ActivePS::Exists(lock)) { + // Note: Not invoking callbacks with ProfilingState::Stopping, because + // we're under lock, and also it would not be useful: Any profiling data + // will be discarded, and we're immediately restarting the profiler below + // and then notifying ProfilingState::Started. + samplerThread = locked_profiler_stop(lock); + } + + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Start counting memory allocations (outside of lock because this may call + // profiler_add_sampled_counter which would attempt to take the lock.) + ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks()); +#endif + + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + return NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID); +} + +void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe& aDuration) { + LOG("profiler_ensure_started"); + + ProfilerParent::ProfilerWillStopIfStarted(); + + bool startedProfiler = false; + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + if (ActivePS::Exists(lock)) { + // The profiler is active. + if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID)) { + // Stop and restart with different settings. + // Note: Not invoking callbacks with ProfilingState::Stopping, because + // we're under lock, and also it would not be useful: Any profiling data + // will be discarded, and we're immediately restarting the profiler + // below and then notifying ProfilingState::Started. + samplerThread = locked_profiler_stop(lock); + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + startedProfiler = true; + } + } else { + // The profiler is stopped. + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + startedProfiler = true; + } + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + + if (startedProfiler) { + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + Unused << NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID); + } +} + +[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) { + LOG("locked_profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + // At the very start, clear RacyFeatures. + RacyFeatures::SetInactive(); + + if (ActivePS::FeatureAudioCallbackTracing(aLock)) { + StopAudioCallbackTracing(); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + java::GeckoJavaSampler::Stop(); + } +#endif + + // Remove support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(nullptr, nullptr); + + // Stop sampling live threads. + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + if (offThreadRef.UnlockedRWForLockedProfilerRef().ProfilingFeatures() == + ThreadProfilingFeatures::NotProfiled) { + continue; + } + + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.GetLockedRWFromAnyThread(); + + lockedThreadData->ClearProfilingFeaturesAndData(aLock); + + if (ActivePS::FeatureJS(aLock)) { + lockedThreadData->StopJSSampling(); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedThreadData.GetLockedRWOnThread(); + lockedRWOnThread) { + // We are on the thread, we can manually poll the current thread so it + // stops profiling immediately. + lockedRWOnThread->PollJSSampling(); + } else if (lockedThreadData->Info().IsMainThread()) { + // Dispatch a runnable to the main thread to call PollJSSampling(), + // so that we don't have wait for the next JS interrupt callback in + // order to start profiling JS. + TriggerPollJSSamplingOnMainThread(); + } + } + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::FeatureNativeAllocations(aLock)) { + mozilla::profiler::disable_native_allocations(); + } +#endif + + // The Stop() call doesn't actually stop Run(); that happens in this + // function's caller when the sampler thread is destroyed. Stop() just gives + // the SamplerThread a chance to do some cleanup with gPSMutex locked. + SamplerThread* samplerThread = ActivePS::Destroy(aLock); + samplerThread->Stop(aLock); + + if (NS_IsMainThread()) { + mozilla::base_profiler_markers_detail:: + ReleaseBufferForMainThreadAddMarker(); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ReleaseBufferForMainThreadAddMarker", + &mozilla::base_profiler_markers_detail:: + ReleaseBufferForMainThreadAddMarker)); + } + + return samplerThread; +} + +RefPtr profiler_stop() { + LOG("profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::Stopping); + } + + ProfilerParent::ProfilerWillStopIfStarted(); + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Remove the hooks early, as native allocations (if they are on) can be + // quite expensive. + mozilla::profiler::remove_memory_hooks(); +#endif + + SamplerThread* samplerThread; + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + samplerThread = locked_profiler_stop(lock); + } + + // We notify observers with gPSMutex unlocked. Otherwise we might get a + // deadlock, if code run by these functions calls a profiler function that + // locks gPSMutex, for example when it wants to insert a marker. + // (This has been seen in practise in bug 1346356, when we were still firing + // these notifications synchronously.) + RefPtr promise = ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + + // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we + // would be waiting here with gPSMutex locked for SamplerThread::Run() to + // return so the join operation within the destructor can complete, but Run() + // needs to lock gPSMutex to return. + // + // Because this call occurs with gPSMutex unlocked, it -- including the final + // iteration of Run()'s loop -- must be able detect deactivation and return + // in a way that's safe with respect to other gPSMutex-locking operations + // that may have occurred in the meantime. + delete samplerThread; + + return promise; +} + +bool profiler_is_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsPaused(lock); +} + +/* [[nodiscard]] */ bool profiler_callback_after_sampling( + PostSamplingCallback&& aCallback) { + LOG("profiler_callback_after_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback)); +} + +RefPtr profiler_pause() { + LOG("profiler_pause"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + invoke_profiler_state_change_callbacks(ProfilingState::Pausing); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused yet, so this is the first pause, let Java know. + // TODO: Distinguish Pause and PauseSampling in Java. + java::GeckoJavaSampler::PauseSampling(); + } +#endif + + RacyFeatures::SetPaused(); + ActivePS::SetIsPaused(lock, true); + ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time())); + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr promise = ProfilerParent::ProfilerPaused(); + NotifyObservers("profiler-paused"); + return promise; +} + +RefPtr profiler_resume() { + LOG("profiler_resume"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::Resume(profiler_time())); + ActivePS::SetIsPaused(lock, false); + RacyFeatures::SetUnpaused(); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused anymore, so this is the last unpause, let Java know. + // TODO: Distinguish Unpause and UnpauseSampling in Java. + java::GeckoJavaSampler::UnpauseSampling(); + } +#endif + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr promise = ProfilerParent::ProfilerResumed(); + NotifyObservers("profiler-resumed"); + + invoke_profiler_state_change_callbacks(ProfilingState::Resumed); + + return promise; +} + +bool profiler_is_sampling_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsSamplingPaused(lock); +} + +RefPtr profiler_pause_sampling() { + LOG("profiler_pause_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused yet, so this is the first pause, let Java know. + // TODO: Distinguish Pause and PauseSampling in Java. + java::GeckoJavaSampler::PauseSampling(); + } +#endif + + RacyFeatures::SetSamplingPaused(); + ActivePS::SetIsSamplingPaused(lock, true); + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::PauseSampling(profiler_time())); + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr promise = ProfilerParent::ProfilerPausedSampling(); + NotifyObservers("profiler-paused-sampling"); + return promise; +} + +RefPtr profiler_resume_sampling() { + LOG("profiler_resume_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::ResumeSampling(profiler_time())); + ActivePS::SetIsSamplingPaused(lock, false); + RacyFeatures::SetSamplingUnpaused(); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused anymore, so this is the last unpause, let Java know. + // TODO: Distinguish Unpause and UnpauseSampling in Java. + java::GeckoJavaSampler::UnpauseSampling(); + } +#endif + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr promise = ProfilerParent::ProfilerResumedSampling(); + NotifyObservers("profiler-resumed-sampling"); + return promise; +} + +bool profiler_feature_active(uint32_t aFeature) { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithFeature(aFeature); +} + +bool profiler_active_without_feature(uint32_t aFeature) { + // This function runs both on and off the main thread. + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithoutFeature(aFeature); +} + +void profiler_write_active_configuration(JSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + ActivePS::WriteActiveConfiguration(lock, aWriter); +} + +void profiler_add_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + locked_profiler_add_sampled_counter(lock, aCounter); +} + +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + locked_profiler_remove_sampled_counter(lock, aCounter); +} + +ProfilingStack* profiler_register_thread(const char* aName, + void* aGuessStackTop) { + DEBUG_LOG("profiler_register_thread(%s)", aName); + + // This will call `ThreadRegistry::Register()` (see below). + return ThreadRegistration::RegisterThread(aName, aGuessStackTop); +} + +/* static */ +void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) { + // Set the thread name (except for the main thread, which is controlled + // elsewhere, and influences the process name on some systems like Linux). + if (!aOnThreadRef.UnlockedConstReaderCRef().Info().IsMainThread()) { + // Make sure we have a nsThread wrapper for the current thread, and that + // NSPR knows its name. + (void)NS_GetCurrentThread(); + NS_SetCurrentThreadName( + aOnThreadRef.UnlockedConstReaderCRef().Info().Name()); + } + + PSAutoLock lock; + + { + RegistryLockExclusive lock{sRegistryMutex}; + MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef})); + } + + if (!CorePS::Exists()) { + // CorePS has not been created yet. + // If&when that happens, it will handle already-registered threads then. + return; + } + + (void)locked_register_thread(lock, OffThreadRef{aOnThreadRef}); +} + +void profiler_unregister_thread() { + // This will call `ThreadRegistry::Unregister()` (see below). + ThreadRegistration::UnregisterThread(); +} + +static void locked_unregister_thread( + PSLockRef lock, ThreadRegistration::OnThreadRef aOnThreadRef) { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut + // down. + return; + } + + // We don't call StopJSSampling() here; there's no point doing that for a JS + // thread that is in the process of disappearing. + + ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData = + aOnThreadRef.GetLockedRWOnThread(); + + ProfiledThreadData* profiledThreadData = + lockedThreadData->GetProfiledThreadData(lock); + lockedThreadData->ClearProfilingFeaturesAndData(lock); + + MOZ_RELEASE_ASSERT( + lockedThreadData->Info().ThreadId() == profiler_current_thread_id(), + "Thread being unregistered has changed its TID"); + + DEBUG_LOG("profiler_unregister_thread: %s", lockedThreadData->Info().Name()); + + if (profiledThreadData && ActivePS::Exists(lock)) { + ActivePS::UnregisterThread(lock, profiledThreadData); + } +} + +/* static */ +void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) { + PSAutoLock psLock; + locked_unregister_thread(psLock, aOnThreadRef); + + RegistryLockExclusive lock{sRegistryMutex}; + for (OffThreadRef& thread : sRegistryContainer) { + if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) { + sRegistryContainer.erase(&thread); + break; + } + } +} + +void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing) { + DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 + ", %s)", + aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID, + aIsPrivateBrowsing ? "true" : "false"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + // When a Browsing context is first loaded, the first url loaded in it will be + // about:blank. Because of that, this call keeps the first non-about:blank + // registration of window and discards the previous one. + RefPtr pageInfo = new PageInformation( + aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID, aIsPrivateBrowsing); + CorePS::AppendRegisteredPage(lock, std::move(pageInfo)); + + // After appending the given page to CorePS, look for the expired + // pages and remove them if there are any. + if (ActivePS::Exists(lock)) { + ActivePS::DiscardExpiredPages(lock); + } +} + +void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) { + PSAutoLock lock; + + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + // During unregistration, if the profiler is active, we have to keep the + // page information since there may be some markers associated with the given + // page. But if profiler is not active. we have no reason to keep the + // page information here because there can't be any marker associated with it. + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID); + } else { + CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID); + } +} + +void profiler_clear_all_pages() { + { + PSAutoLock lock; + + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut + // down. + return; + } + + CorePS::ClearRegisteredPages(lock); + if (ActivePS::Exists(lock)) { + ActivePS::ClearUnregisteredPages(lock); + } + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + ProfilerParent::ClearAllPages(); +} + +namespace geckoprofiler::markers::detail { + +Maybe profiler_get_inner_window_id_from_docshell( + nsIDocShell* aDocshell) { + Maybe innerWindowID = Nothing(); + if (aDocshell) { + auto outerWindow = aDocshell->GetWindow(); + if (outerWindow) { + auto innerWindow = outerWindow->GetCurrentInnerWindow(); + if (innerWindow) { + innerWindowID = Some(innerWindow->WindowID()); + } + } + } + return innerWindowID; +} + +} // namespace geckoprofiler::markers::detail + +namespace geckoprofiler::markers { + +struct CPUAwakeMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("Awake"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int64_t aCPUId +#ifdef GP_OS_darwin + , + uint32_t aQoS +#endif +#ifdef GP_OS_windows + , + int32_t aAbsolutePriority, + int32_t aRelativePriority, + int32_t aCurrentPriority +#endif + ) { +#ifndef GP_PLAT_arm64_darwin + aWriter.IntProperty("CPU Id", aCPUId); +#endif +#ifdef GP_OS_windows + if (aAbsolutePriority) { + aWriter.IntProperty("absPriority", aAbsolutePriority); + } + if (aCurrentPriority) { + aWriter.IntProperty("curPriority", aCurrentPriority); + } + aWriter.IntProperty("priority", aRelativePriority); +#endif +#ifdef GP_OS_darwin + const char* QoS = ""; + switch (aQoS) { + case QOS_CLASS_USER_INTERACTIVE: + QoS = "User Interactive"; + break; + case QOS_CLASS_USER_INITIATED: + QoS = "User Initiated"; + break; + case QOS_CLASS_DEFAULT: + QoS = "Default"; + break; + case QOS_CLASS_UTILITY: + QoS = "Utility"; + break; + case QOS_CLASS_BACKGROUND: + QoS = "Background"; + break; + default: + QoS = "Unspecified"; + } + + aWriter.StringProperty("QoS", + ProfilerString8View::WrapNullTerminatedString(QoS)); +#endif + } + + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormat("CPU Time", MS::Format::Duration); +#ifndef GP_PLAT_arm64_darwin + schema.AddKeyFormat("CPU Id", MS::Format::Integer); + schema.SetTableLabel("Awake - CPU Id = {marker.data.CPU Id}"); +#endif +#ifdef GP_OS_windows + schema.AddKeyLabelFormat("priority", "Relative Thread Priority", + MS::Format::Integer); + schema.AddKeyLabelFormat("absPriority", "Base Thread Priority", + MS::Format::Integer); + schema.AddKeyLabelFormat("curPriority", "Current Thread Priority", + MS::Format::Integer); +#endif +#ifdef GP_OS_darwin + schema.AddKeyLabelFormat("QoS", "Quality of Service", MS::Format::String); +#endif + return schema; + } +}; + +struct CPUAwakeMarkerEnd : public CPUAwakeMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("AwakeEnd"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int64_t aCPUTimeNs) { + if (aCPUTimeNs) { + constexpr double NS_PER_MS = 1'000'000; + aWriter.DoubleProperty("CPU Time", double(aCPUTimeNs) / NS_PER_MS); + } + } +}; + +} // namespace geckoprofiler::markers + +void profiler_mark_thread_asleep() { + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + uint64_t cpuTimeNs = ThreadRegistration::WithOnThreadRefOr( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + return aOnThreadRef.UnlockedConstReaderAndAtomicRWRef() + .GetNewCpuTimeInNs(); + }, + 0); + PROFILER_MARKER("Awake", OTHER, MarkerTiming::IntervalEnd(), + CPUAwakeMarkerEnd, cpuTimeNs); +} + +void profiler_thread_sleep() { + profiler_mark_thread_asleep(); + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetSleeping(); + }); +} + +#if defined(GP_OS_windows) +# if !defined(__MINGW32__) +enum { + ThreadBasicInformation, +}; +# endif + +struct THREAD_BASIC_INFORMATION { + NTSTATUS ExitStatus; + PVOID TebBaseAddress; + CLIENT_ID ClientId; + KAFFINITY AffMask; + DWORD Priority; + DWORD BasePriority; +}; +#endif + +static mozilla::Atomic gWakeCount( + 0); + +namespace geckoprofiler::markers { +struct WakeUpCountMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("WakeUpCount"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int32_t aCount, + const ProfilerString8View& aType) { + aWriter.IntProperty("Count", aCount); + aWriter.StringProperty("label", aType); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormat("Count", MS::Format::Integer); + schema.SetTooltipLabel("{marker.name} - {marker.data.label}"); + schema.SetTableLabel( + "{marker.name} - {marker.data.label}: {marker.data.count}"); + return schema; + } +}; +} // namespace geckoprofiler::markers + +void profiler_record_wakeup_count(const nsACString& aProcessType) { + static uint64_t previousThreadWakeCount = 0; + + uint64_t newWakeups = gWakeCount - previousThreadWakeCount; + if (newWakeups > 0) { + if (newWakeups < std::numeric_limits::max()) { + int32_t newWakeups32 = int32_t(newWakeups); + mozilla::glean::power::total_thread_wakeups.Add(newWakeups32); + mozilla::glean::power::wakeups_per_process_type.Get(aProcessType) + .Add(newWakeups32); + PROFILER_MARKER("Thread Wake-ups", OTHER, {}, WakeUpCountMarker, + newWakeups32, aProcessType); + } + + previousThreadWakeCount += newWakeups; + } + +#ifdef NIGHTLY_BUILD + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + const ThreadRegistry::UnlockedConstReaderAndAtomicRW& threadData = + offThreadRef.UnlockedConstReaderAndAtomicRWRef(); + threadData.RecordWakeCount(); + } +#endif +} + +void profiler_mark_thread_awake() { + ++gWakeCount; + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + int64_t cpuId = 0; +#if defined(GP_OS_windows) + cpuId = GetCurrentProcessorNumber(); +#elif defined(GP_OS_darwin) +# ifdef GP_PLAT_amd64_darwin + unsigned int eax, ebx, ecx, edx; + __cpuid_count(1, 0, eax, ebx, ecx, edx); + // Check if we have an APIC. + if ((edx & (1 << 9))) { + // APIC ID is bits 24-31 of EBX + cpuId = ebx >> 24; + } +# endif +#else + cpuId = sched_getcpu(); +#endif + +#if defined(GP_OS_windows) + LONG priority; + static const auto get_thread_information_fn = + reinterpret_cast(::GetProcAddress( + ::GetModuleHandle(L"Kernel32.dll"), "GetThreadInformation")); + + if (!get_thread_information_fn || + !get_thread_information_fn(GetCurrentThread(), ThreadAbsoluteCpuPriority, + &priority, sizeof(priority))) { + priority = 0; + } + + static const auto nt_query_information_thread_fn = + reinterpret_cast(::GetProcAddress( + ::GetModuleHandle(L"ntdll.dll"), "NtQueryInformationThread")); + + LONG currentPriority = 0; + if (nt_query_information_thread_fn) { + THREAD_BASIC_INFORMATION threadInfo; + auto status = (*nt_query_information_thread_fn)( + GetCurrentThread(), (THREADINFOCLASS)ThreadBasicInformation, + &threadInfo, sizeof(threadInfo), NULL); + if (NT_SUCCESS(status)) { + currentPriority = threadInfo.Priority; + } + } +#endif + PROFILER_MARKER( + "Awake", OTHER, MarkerTiming::IntervalStart(), CPUAwakeMarker, cpuId +#if defined(GP_OS_darwin) + , + qos_class_self() +#endif +#if defined(GP_OS_windows) + , + priority, GetThreadPriority(GetCurrentThread()), currentPriority +#endif + ); +} + +void profiler_thread_wake() { + profiler_mark_thread_awake(); + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetAwake(); + }); +} + +void profiler_js_interrupt_callback() { + // This function runs on JS threads being sampled. + PollJSSamplingForCurrentThread(); +} + +double profiler_time() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + return delta.ToMilliseconds(); +} + +bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer, + StackCaptureOptions aCaptureOptions) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!profiler_is_active() || + aCaptureOptions == StackCaptureOptions::NoStack) { + return false; + } + + return ThreadRegistration::WithOnThreadRefOr( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + mozilla::Maybe maybeFeatures = + RacyFeatures::FeaturesIfActiveAndUnpaused(); + if (!maybeFeatures) { + return false; + } + + ProfileBuffer profileBuffer(aChunkedBuffer); + + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + REGISTERS_SYNC_POPULATE(regs); +#else + regs.Clear(); +#endif + + DoSyncSample(*maybeFeatures, + aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef(), + TimeStamp::Now(), regs, profileBuffer, aCaptureOptions); + + return true; + }, + // If this was called from a non-registered thread, return false and do no + // more work. This can happen from a memory hook. + false); +} + +UniquePtr profiler_capture_backtrace() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + AUTO_PROFILER_LABEL("profiler_capture_backtrace", PROFILER); + + // Quick is-active and feature check before allocating a buffer. + // If NoMarkerStacks is set, we don't want to capture a backtrace. + if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) { + return nullptr; + } + + auto buffer = MakeUnique( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + MakeUnique( + ProfileBufferChunkManager::scExpectedMaximumStackSize)); + + if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) { + return nullptr; + } + + return buffer; +} + +UniqueProfilerBacktrace profiler_get_backtrace() { + UniquePtr buffer = profiler_capture_backtrace(); + + if (!buffer) { + return nullptr; + } + + return UniqueProfilerBacktrace( + new ProfilerBacktrace("SyncProfile", std::move(buffer))); +} + +void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) { + delete aBacktrace; +} + +bool profiler_is_locked_on_current_thread() { + // This function is used to help users avoid calling `profiler_...` functions + // when the profiler may already have a lock in place, which would prevent a + // 2nd recursive lock (resulting in a crash or a never-ending wait), or a + // deadlock between any two mutexes. So we must return `true` for any of: + // - The main profiler mutex, used by most functions, and/or + // - The buffer mutex, used directly in some functions without locking the + // main mutex, e.g., marker-related functions. + // - The ProfilerParent or ProfilerChild mutex, used to store and process + // buffer chunk updates. + return PSAutoLock::IsLockedOnCurrentThread() || + ThreadRegistry::IsRegistryMutexLockedOnCurrentThread() || + ThreadRegistration::IsDataMutexLockedOnCurrentThread() || + profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread() || + ProfilerParent::IsLockedOnCurrentThread() || + ProfilerChild::IsLockedOnCurrentThread(); +} + +void profiler_set_js_context(JSContext* aCx) { + MOZ_ASSERT(aCx); + ThreadRegistration::WithOnThreadRef( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + // The profiler mutex must be locked before the ThreadRegistration's. + PSAutoLock lock; + aOnThreadRef.WithLockedRWOnThread( + [&](ThreadRegistration::LockedRWOnThread& aThreadData) { + aThreadData.SetJSContext(aCx); + + if (!ActivePS::Exists(lock) || !ActivePS::FeatureJS(lock)) { + return; + } + + // This call is on-thread, so we can call PollJSSampling() to + // start JS sampling immediately. + aThreadData.PollJSSampling(); + + if (ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(lock); + profiledThreadData) { + profiledThreadData->NotifyReceivedJSContext( + ActivePS::Buffer(lock).BufferRangeEnd()); + } + }); + }); +} + +void profiler_clear_js_context() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + JSContext* cx = + aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef().GetJSContext(); + if (!cx) { + return; + } + + // The profiler mutex must be locked before the ThreadRegistration's. + PSAutoLock lock; + ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData = + aOnThreadRef.GetLockedRWOnThread(); + + if (ProfiledThreadData* profiledThreadData = + lockedThreadData->GetProfiledThreadData(lock); + profiledThreadData && ActivePS::Exists(lock) && + ActivePS::FeatureJS(lock)) { + profiledThreadData->NotifyAboutToLoseJSContext( + cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock)); + + // Notify the JS context that profiling for this context has + // stopped. Do this by calling StopJSSampling and PollJSSampling + // before nulling out the JSContext. + lockedThreadData->StopJSSampling(); + lockedThreadData->PollJSSampling(); + + lockedThreadData->ClearJSContext(); + + // Tell the thread that we'd like to have JS sampling on this + // thread again, once it gets a new JSContext (if ever). + lockedThreadData->StartJSSampling(ActivePS::JSFlags(lock)); + } else { + // This thread is not being profiled or JS profiling is off, we only + // need to clear the context pointer. + lockedThreadData->ClearJSContext(); + } + }); +} + +static void profiler_suspend_and_sample_thread( + const PSAutoLock* aLockIfAsynchronousSampling, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + JsFrame* aJsFrames, uint32_t aFeatures, ProfilerStackCollector& aCollector, + bool aSampleNative) { + const ThreadRegistrationInfo& info = aThreadData.Info(); + + if (info.IsMainThread()) { + aCollector.SetIsMainThread(); + } + + // Allocate the space for the native stack + NativeStack nativeStack; + + auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) { + // The target thread is now suspended. Collect a native backtrace, + // and call the callback. + StackWalkControl* stackWalkControlIfSupported = nullptr; +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + StackWalkControl stackWalkControl; + if constexpr (StackWalkControl::scIsSupported) { + if (aSampleNative) { + stackWalkControlIfSupported = &stackWalkControl; + } + } +#endif + const uint32_t jsFramesCount = + aJsFrames ? ExtractJsFrames(!aLockIfAsynchronousSampling, aThreadData, + aRegs, aCollector, aJsFrames, + stackWalkControlIfSupported) + : 0; + +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + if (aSampleNative) { + // We can only use FramePointerStackWalk or MozStackWalk from + // suspend_and_sample_thread as other stackwalking methods may not be + // initialized. +# if defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); +# else +# error "Invalid configuration" +# endif + + MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs, + nativeStack, aCollector, aJsFrames, jsFramesCount); + } else +#endif + { + MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs, + nativeStack, aCollector, aJsFrames, jsFramesCount); + + aCollector.CollectNativeLeafAddr((void*)aRegs.mPC); + } + }; + + if (!aLockIfAsynchronousSampling) { + // Sampling the current thread, do NOT suspend it! + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + REGISTERS_SYNC_POPULATE(regs); +#else + regs.Clear(); +#endif + collectStack(regs, TimeStamp::Now()); + } else { + // Suspend, sample, and then resume the target thread. + Sampler sampler(*aLockIfAsynchronousSampling); + TimeStamp now = TimeStamp::Now(); + sampler.SuspendAndSampleAndResumeThread(*aLockIfAsynchronousSampling, + aThreadData, now, collectStack); + + // NOTE: Make sure to disable the sampler before it is destroyed, in + // case the profiler is running at the same time. + sampler.Disable(*aLockIfAsynchronousSampling); + } +} + +// NOTE: aCollector's methods will be called while the target thread is paused. +// Doing things in those methods like allocating -- which may try to claim +// locks -- is a surefire way to deadlock. +void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId, + uint32_t aFeatures, + ProfilerStackCollector& aCollector, + bool aSampleNative /* = true */) { + if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) { + // Sampling the current thread. Get its information from the TLS (no locking + // required.) + ThreadRegistration::WithOnThreadRef( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread( + [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& + aThreadData) { + if (!aThreadData.GetJSContext()) { + // No JSContext, there is no JS frame buffer (and no need for + // it). + profiler_suspend_and_sample_thread( + /* aLockIfAsynchronousSampling = */ nullptr, aThreadData, + /* aJsFrames = */ nullptr, aFeatures, aCollector, + aSampleNative); + } else { + // JSContext is present, we need to lock the thread data to + // access the JS frame buffer. + aOnThreadRef.WithConstLockedRWOnThread( + [&](const ThreadRegistration::LockedRWOnThread& + aLockedThreadData) { + profiler_suspend_and_sample_thread( + /* aLockIfAsynchronousSampling = */ nullptr, + aThreadData, aLockedThreadData.GetJsFrameBuffer(), + aFeatures, aCollector, aSampleNative); + }); + } + }); + }); + } else { + // Lock the profiler before accessing the ThreadRegistry. + PSAutoLock lock; + ThreadRegistry::WithOffThreadRef( + aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) { + aOffThreadRef.WithLockedRWFromAnyThread( + [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& + aThreadData) { + JsFrameBuffer& jsFrames = CorePS::JsFrames(lock); + profiler_suspend_and_sample_thread(&lock, aThreadData, jsFrames, + aFeatures, aCollector, + aSampleNative); + }); + }); + } +} + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h new file mode 100644 index 0000000000..59d2c7ff42 --- /dev/null +++ b/tools/profiler/core/platform.h @@ -0,0 +1,381 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef TOOLS_PLATFORM_H_ +#define TOOLS_PLATFORM_H_ + +#include "PlatformMacros.h" + +#include "json/json.h" +#include "mozilla/Atomics.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/Logging.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/ProgressLogger.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "nsString.h" +#include "shared-libraries.h" + +#include +#include +#include + +class ProfilerCodeAddressService; + +namespace mozilla { +struct SymbolTable; +} + +extern mozilla::LazyLogModule gProfilerLog; + +// These are for MOZ_LOG="prof:3" or higher. It's the default logging level for +// the profiler, and should be used sparingly. +#define LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Info) +#define LOG(arg, ...) \ + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Info, \ + ("[%" PRIu64 "] " arg, \ + uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__)) + +// These are for MOZ_LOG="prof:4" or higher. It should be used for logging that +// is somewhat more verbose than LOG. +#define DEBUG_LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Debug) +#define DEBUG_LOG(arg, ...) \ + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Debug, \ + ("[%" PRIu64 "] " arg, \ + uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__)) + +typedef uint8_t* Address; + +// Stringify the given JSON value, in the most compact format. +// Note: Numbers are limited to a precision of 6 decimal digits, so that +// timestamps in ms have a precision in ns. +Json::String ToCompactString(const Json::Value& aJsonValue); + +// Profiling log stored in a Json::Value. The actual log only exists while the +// profiler is running, and will be inserted at the end of the JSON profile. +class ProfilingLog { + public: + // These will be called by ActivePS when the profiler starts/stops. + static void Init(); + static void Destroy(); + + // Access the profiling log JSON object, in order to modify it. + // Only calls the given function if the profiler is active. + // Thread-safe. But `aF` must not call other locking profiler functions. + // This is intended to capture some internal logging that doesn't belong in + // other places like markers. The log is accessible through the JS console on + // profiler.firefox.com, in the `profile.profilingLog` object; the data format + // is intentionally not defined, and not intended to be shown in the + // front-end. + // Please use caution not to output too much data. + template + static void Access(F&& aF) { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + if (gLog) { + std::forward(aF)(*gLog); + } + } + +#define DURATION_JSON_SUFFIX "_ms" + + // Convert a TimeDuration to the value to be stored in the log. + // Use DURATION_JSON_SUFFIX as suffix in the property name. + static Json::Value Duration(const mozilla::TimeDuration& aDuration) { + return Json::Value{aDuration.ToMilliseconds()}; + } + +#define TIMESTAMP_JSON_SUFFIX "_TSms" + + // Convert a TimeStamp to the value to be stored in the log. + // Use TIMESTAMP_JSON_SUFFIX as suffix in the property name. + static Json::Value Timestamp( + const mozilla::TimeStamp& aTimestamp = mozilla::TimeStamp::Now()) { + if (aTimestamp.IsNull()) { + return Json::Value{0.0}; + } + return Duration(aTimestamp - mozilla::TimeStamp::ProcessCreation()); + } + + static bool IsLockedOnCurrentThread(); + + private: + static mozilla::baseprofiler::detail::BaseProfilerMutex gMutex; + static mozilla::UniquePtr gLog; +}; + +// ---------------------------------------------------------------------------- +// Miscellaneous + +// If positive, skip stack-sampling in the sampler thread loop. +// Users should increment it atomically when samplings should be avoided, and +// later decrement it back. Multiple uses can overlap. +// There could be a sampling in progress when this is first incremented, so if +// it is critical to prevent any sampling, lock the profiler mutex instead. +// Relaxed ordering, because it's used to request that the profiler pause +// future sampling; this is not time critical, nor dependent on anything else. +extern mozilla::Atomic gSkipSampling; + +void AppendSharedLibraries(mozilla::JSONWriter& aWriter, + const SharedLibraryInfo& aInfo); + +// Convert the array of strings to a bitfield. +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup = false); + +// Add the begin/end 'Awake' markers for the thread. +void profiler_mark_thread_awake(); + +void profiler_mark_thread_asleep(); + +[[nodiscard]] bool profiler_get_profile_json( + SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter, + double aSinceTime, bool aIsShuttingDown, + mozilla::ProgressLogger aProgressLogger); + +// Flags to conveniently track various JS instrumentations. +enum class JSInstrumentationFlags { + StackSampling = 0x1, + Allocations = 0x2, +}; + +// Write out the information of the active profiling configuration. +void profiler_write_active_configuration(mozilla::JSONWriter& aWriter); + +// Extract all received exit profiles that have not yet expired (i.e., they +// still intersect with this process' buffer range). +mozilla::Vector profiler_move_exit_profiles(); + +// If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we return a new +// ProfilerCodeAddressService object to use for local symbolication of profiles. +// This is off by default, and mainly intended for local development. +mozilla::UniquePtr +profiler_code_address_service_for_presymbolication(); + +extern "C" { +// This function is defined in the profiler rust module at +// tools/profiler/rust-helper. mozilla::SymbolTable and CompactSymbolTable +// have identical memory layout. +bool profiler_get_symbol_table(const char* debug_path, const char* breakpad_id, + mozilla::SymbolTable* symbol_table); + +bool profiler_demangle_rust(const char* mangled, char* buffer, size_t len); +} + +// For each running times value, call MACRO(index, name, unit, jsonProperty) +#define PROFILER_FOR_EACH_RUNNING_TIME(MACRO) \ + MACRO(0, ThreadCPU, Delta, threadCPUDelta) + +// This class contains all "running times" such as CPU usage measurements. +// All measurements are listed in `PROFILER_FOR_EACH_RUNNING_TIME` above. +// Each measurement is optional and only takes a value when explicitly set. +// Two RunningTimes object may be subtracted, to get the difference between +// known values. +class RunningTimes { + public: + constexpr RunningTimes() = default; + + // Constructor with only a timestamp, useful when no measurements will be + // taken. + constexpr explicit RunningTimes(const mozilla::TimeStamp& aTimeStamp) + : mPostMeasurementTimeStamp(aTimeStamp) {} + + constexpr void Clear() { *this = RunningTimes{}; } + + constexpr bool IsEmpty() const { return mKnownBits == 0; } + + // This should be called right after CPU measurements have been taken. + void SetPostMeasurementTimeStamp(const mozilla::TimeStamp& aTimeStamp) { + mPostMeasurementTimeStamp = aTimeStamp; + } + + const mozilla::TimeStamp& PostMeasurementTimeStamp() const { + return mPostMeasurementTimeStamp; + } + + // Should be filled for any registered thread. + +#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty) \ + constexpr bool Is##name##unit##Known() const { \ + return (mKnownBits & mGot##name##unit) != 0; \ + } \ + \ + constexpr void Clear##name##unit() { \ + m##name##unit = 0; \ + mKnownBits &= ~mGot##name##unit; \ + } \ + \ + constexpr void Reset##name##unit(uint64_t a##name##unit) { \ + m##name##unit = a##name##unit; \ + mKnownBits |= mGot##name##unit; \ + } \ + \ + constexpr void Set##name##unit(uint64_t a##name##unit) { \ + MOZ_ASSERT(!Is##name##unit##Known(), #name #unit " already set"); \ + Reset##name##unit(a##name##unit); \ + } \ + \ + constexpr mozilla::Maybe Get##name##unit() const { \ + if (Is##name##unit##Known()) { \ + return mozilla::Some(m##name##unit); \ + } \ + return mozilla::Nothing{}; \ + } \ + \ + constexpr mozilla::Maybe GetJson##name##unit() const { \ + if (Is##name##unit##Known()) { \ + return mozilla::Some(ConvertRawToJson(m##name##unit)); \ + } \ + return mozilla::Nothing{}; \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER) + +#undef RUNNING_TIME_MEMBER + + // Take values from another RunningTimes. + RunningTimes& TakeFrom(RunningTimes& aOther) { + if (!aOther.IsEmpty()) { +#define RUNNING_TIME_TAKE(index, name, unit, jsonProperty) \ + if (aOther.Is##name##unit##Known()) { \ + Set##name##unit(std::exchange(aOther.m##name##unit, 0)); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_TAKE) + +#undef RUNNING_TIME_TAKE + + aOther.mKnownBits = 0; + } + return *this; + } + + // Difference from `aBefore` to `this`. Any unknown makes the result unknown. + // PostMeasurementTimeStamp set to `this` PostMeasurementTimeStamp, to keep + // the most recent timestamp associated with the end of the interval over + // which the difference applies. + RunningTimes operator-(const RunningTimes& aBefore) const { + RunningTimes diff; + diff.mPostMeasurementTimeStamp = mPostMeasurementTimeStamp; +#define RUNNING_TIME_SUB(index, name, unit, jsonProperty) \ + if (Is##name##unit##Known() && aBefore.Is##name##unit##Known()) { \ + diff.Set##name##unit(m##name##unit - aBefore.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SUB) + +#undef RUNNING_TIME_SUB + return diff; + } + + private: + friend mozilla::ProfileBufferEntryWriter::Serializer; + friend mozilla::ProfileBufferEntryReader::Deserializer; + + // Platform-dependent. + static uint64_t ConvertRawToJson(uint64_t aRawValue); + + mozilla::TimeStamp mPostMeasurementTimeStamp; + + uint32_t mKnownBits = 0u; + +#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty) \ + static constexpr uint32_t mGot##name##unit = 1u << index; \ + uint64_t m##name##unit = 0; + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER) + +#undef RUNNING_TIME_MEMBER +}; + +template <> +struct mozilla::ProfileBufferEntryWriter::Serializer { + static Length Bytes(const RunningTimes& aRunningTimes) { + Length bytes = 0; + +#define RUNNING_TIME_SERIALIZATION_BYTES(index, name, unit, jsonProperty) \ + if (aRunningTimes.Is##name##unit##Known()) { \ + bytes += ULEB128Size(aRunningTimes.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZATION_BYTES) + +#undef RUNNING_TIME_SERIALIZATION_BYTES + return ULEB128Size(aRunningTimes.mKnownBits) + bytes; + } + + static void Write(ProfileBufferEntryWriter& aEW, + const RunningTimes& aRunningTimes) { + aEW.WriteULEB128(aRunningTimes.mKnownBits); + +#define RUNNING_TIME_SERIALIZE(index, name, unit, jsonProperty) \ + if (aRunningTimes.Is##name##unit##Known()) { \ + aEW.WriteULEB128(aRunningTimes.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZE) + +#undef RUNNING_TIME_SERIALIZE + } +}; + +template <> +struct mozilla::ProfileBufferEntryReader::Deserializer { + static void ReadInto(ProfileBufferEntryReader& aER, + RunningTimes& aRunningTimes) { + aRunningTimes = Read(aER); + } + + static RunningTimes Read(ProfileBufferEntryReader& aER) { + // Start with empty running times, everything is cleared. + RunningTimes times; + + // This sets all the bits into mKnownBits, we don't need to modify it + // further. + times.mKnownBits = aER.ReadULEB128(); + + // For each member that should be known, read its value. +#define RUNNING_TIME_DESERIALIZE(index, name, unit, jsonProperty) \ + if (times.Is##name##unit##Known()) { \ + times.m##name##unit = aER.ReadULEB128(); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_DESERIALIZE) + +#undef RUNNING_TIME_DESERIALIZE + + return times; + } +}; + +#endif /* ndef TOOLS_PLATFORM_H_ */ diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc new file mode 100644 index 0000000000..2991e64909 --- /dev/null +++ b/tools/profiler/core/shared-libraries-linux.cc @@ -0,0 +1,280 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#define PATH_MAX_TOSTRING(x) #x +#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x) +#include +#include +#include +#include +#include +#include +#include "platform.h" +#include "shared-libraries.h" +#include "GeckoProfiler.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Unused.h" +#include "nsDebug.h" +#include "nsNativeCharsetUtils.h" +#include + +#include "common/linux/file_id.h" +#include +#include +#if defined(GP_OS_linux) || defined(GP_OS_android) +# include +#endif +#include + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include // dl_phdr_info +#else +# error "Unexpected configuration" +#endif + +#if defined(GP_OS_android) +extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr( + int (*callback)(struct dl_phdr_info* info, size_t size, void* data), + void* data); +#endif + +struct LoadedLibraryInfo { + LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress, + unsigned long aFirstMappingStart, + unsigned long aLastMappingEnd) + : mName(aName), + mBaseAddress(aBaseAddress), + mFirstMappingStart(aFirstMappingStart), + mLastMappingEnd(aLastMappingEnd) {} + + nsCString mName; + unsigned long mBaseAddress; + unsigned long mFirstMappingStart; + unsigned long mLastMappingEnd; +}; + +static nsCString IDtoUUIDString( + const google_breakpad::wasteful_vector& aIdentifier) { + using namespace google_breakpad; + + nsCString uuid; + const std::string str = FileID::ConvertIdentifierToUUIDString(aIdentifier); + uuid.Append(str.c_str(), str.size()); + // This is '0', not '\0', since it represents the breakpad id age. + uuid.Append('0'); + return uuid; +} + +// Return raw Build ID in hex. +static nsCString IDtoString( + const google_breakpad::wasteful_vector& aIdentifier) { + using namespace google_breakpad; + + nsCString uuid; + const std::string str = FileID::ConvertIdentifierToString(aIdentifier); + uuid.Append(str.c_str(), str.size()); + return uuid; +} + +// Get the breakpad Id for the binary file pointed by bin_name +static nsCString getBreakpadId(const char* bin_name) { + using namespace google_breakpad; + + PageAllocator allocator; + auto_wasteful_vector identifier(&allocator); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return IDtoUUIDString(identifier); + } + + return ""_ns; +} + +// Get the code Id for the binary file pointed by bin_name +static nsCString getCodeId(const char* bin_name) { + using namespace google_breakpad; + + PageAllocator allocator; + auto_wasteful_vector identifier(&allocator); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return IDtoString(identifier); + } + + return ""_ns; +} + +static SharedLibrary SharedLibraryAtPath(const char* path, + unsigned long libStart, + unsigned long libEnd, + unsigned long offset = 0) { + nsAutoString pathStr; + mozilla::Unused << NS_WARN_IF( + NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr))); + + nsAutoString nameStr = pathStr; + int32_t pos = nameStr.RFindChar('/'); + if (pos != kNotFound) { + nameStr.Cut(0, pos + 1); + } + + return SharedLibrary(libStart, libEnd, offset, getBreakpadId(path), + getCodeId(path), nameStr, pathStr, nameStr, pathStr, + ""_ns, ""); +} + +static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size, + void* data) { + auto libInfoList = reinterpret_cast*>(data); + + if (dl_info->dlpi_phnum <= 0) return 0; + + unsigned long baseAddress = dl_info->dlpi_addr; + unsigned long firstMappingStart = -1; + unsigned long lastMappingEnd = 0; + + for (size_t i = 0; i < dl_info->dlpi_phnum; i++) { + if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr; + unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz; + if (start < firstMappingStart) { + firstMappingStart = start; + } + if (end > lastMappingEnd) { + lastMappingEnd = end; + } + } + + libInfoList->AppendElement(LoadedLibraryInfo( + dl_info->dlpi_name, baseAddress, firstMappingStart, lastMappingEnd)); + + return 0; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo info; + +#if defined(GP_OS_linux) + // We need to find the name of the executable (exeName, exeNameLen) and the + // address of its executable section (exeExeAddr) in the running image. + char exeName[PATH_MAX]; + memset(exeName, 0, sizeof(exeName)); + + ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1); + if (exeNameLen == -1) { + // readlink failed for whatever reason. Note this, but keep going. + exeName[0] = '\0'; + exeNameLen = 0; + LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed"); + } else { + // Assert no buffer overflow. + MOZ_RELEASE_ASSERT(exeNameLen >= 0 && + exeNameLen < static_cast(sizeof(exeName))); + } + + unsigned long exeExeAddr = 0; +#endif + +#if defined(GP_OS_android) + // If dl_iterate_phdr doesn't exist, we give up immediately. + if (!dl_iterate_phdr) { + // On ARM Android, dl_iterate_phdr is provided by the custom linker. + // So if libxul was loaded by the system linker (e.g. as part of + // xpcshell when running tests), it won't be available and we should + // not call it. + return info; + } +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) + // Read info from /proc/self/maps. We ignore most of it. + pid_t pid = profiler_current_process_id().ToNumber(); + char path[PATH_MAX]; + SprintfLiteral(path, "/proc/%d/maps", pid); + std::ifstream maps(path); + std::string line; + while (std::getline(maps, line)) { + int ret; + unsigned long start; + unsigned long end; + char perm[6 + 1] = ""; + unsigned long offset; + char modulePath[PATH_MAX + 1] = ""; + ret = sscanf(line.c_str(), + "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n", + &start, &end, perm, &offset, modulePath); + if (!strchr(perm, 'x')) { + // Ignore non executable entries + continue; + } + if (ret != 5 && ret != 4) { + LOG("SharedLibraryInfo::GetInfoForSelf(): " + "reading /proc/self/maps failed"); + continue; + } + +# if defined(GP_OS_linux) + // Try to establish the main executable's load address. + if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) { + exeExeAddr = start; + } +# elif defined(GP_OS_android) + // Use /proc/pid/maps to get the dalvik-jit section since it has no + // associated phdrs. + if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) { + info.AddSharedLibrary( + SharedLibraryAtPath(modulePath, start, end, offset)); + if (info.GetSize() > 10000) { + LOG("SharedLibraryInfo::GetInfoForSelf(): " + "implausibly large number of mappings acquired"); + break; + } + } +# endif + } +#endif + + nsTArray libInfoList; + + // We collect the bulk of the library info using dl_iterate_phdr. + dl_iterate_phdr(dl_iterate_callback, &libInfoList); + + for (const auto& libInfo : libInfoList) { + info.AddSharedLibrary( + SharedLibraryAtPath(libInfo.mName.get(), libInfo.mFirstMappingStart, + libInfo.mLastMappingEnd, + libInfo.mFirstMappingStart - libInfo.mBaseAddress)); + } + +#if defined(GP_OS_linux) + // Make another pass over the information we just harvested from + // dl_iterate_phdr. If we see a nameless object mapped at what we earlier + // established to be the main executable's load address, attach the + // executable's name to that entry. + for (size_t i = 0; i < info.GetSize(); i++) { + SharedLibrary& lib = info.GetMutableEntry(i); + if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() && + lib.GetNativeDebugPath().empty()) { + lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(), + lib.GetOffset()); + + // We only expect to see one such entry. + break; + } + } +#endif + + return info; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc new file mode 100644 index 0000000000..415fda3633 --- /dev/null +++ b/tools/profiler/core/shared-libraries-macos.cc @@ -0,0 +1,211 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#include "ClearOnShutdown.h" +#include "mozilla/StaticMutex.h" +#include "mozilla/Unused.h" +#include "nsNativeCharsetUtils.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Architecture specific abstraction. +#if defined(GP_ARCH_x86) +typedef mach_header platform_mach_header; +typedef segment_command mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC +# define CMD_SEGMENT LC_SEGMENT +# define seg_size uint32_t +#else +typedef mach_header_64 platform_mach_header; +typedef segment_command_64 mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC_64 +# define CMD_SEGMENT LC_SEGMENT_64 +# define seg_size uint64_t +#endif + +struct NativeSharedLibrary { + const platform_mach_header* header; + std::string path; +}; +static std::vector* sSharedLibrariesList = nullptr; +static mozilla::StaticMutex sSharedLibrariesMutex MOZ_UNANNOTATED; + +static void SharedLibraryAddImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast(mh); + + Dl_info info; + if (!dladdr(header, &info)) { + return; + } + + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + if (!sSharedLibrariesList) { + return; + } + + NativeSharedLibrary lib = {header, info.dli_fname}; + sSharedLibrariesList->push_back(lib); +} + +static void SharedLibraryRemoveImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast(mh); + + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + if (!sSharedLibrariesList) { + return; + } + + uint32_t count = sSharedLibrariesList->size(); + for (uint32_t i = 0; i < count; ++i) { + if ((*sSharedLibrariesList)[i].header == header) { + sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i); + return; + } + } +} + +void SharedLibraryInfo::Initialize() { + // NOTE: We intentionally leak this memory here. We're allocating dynamically + // in order to avoid static initializers. + sSharedLibrariesList = new std::vector(); + + _dyld_register_func_for_add_image(SharedLibraryAddImage); + _dyld_register_func_for_remove_image(SharedLibraryRemoveImage); +} + +static void addSharedLibrary(const platform_mach_header* header, + const char* path, SharedLibraryInfo& info) { + const struct load_command* cmd = + reinterpret_cast(header + 1); + + seg_size size = 0; + unsigned long long start = reinterpret_cast(header); + // Find the cmd segment in the macho image. It will contain the offset we care + // about. + const uint8_t* uuid_bytes = nullptr; + for (unsigned int i = 0; + cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0); + ++i) { + if (cmd->cmd == CMD_SEGMENT) { + const mach_segment_command_type* seg = + reinterpret_cast(cmd); + + if (!strcmp(seg->segname, "__TEXT")) { + size = seg->vmsize; + } + } else if (cmd->cmd == LC_UUID) { + const uuid_command* ucmd = reinterpret_cast(cmd); + uuid_bytes = ucmd->uuid; + } + + cmd = reinterpret_cast( + reinterpret_cast(cmd) + cmd->cmdsize); + } + + nsAutoCString uuid; + nsAutoCString breakpadId; + if (uuid_bytes != nullptr) { + uuid.AppendPrintf( + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X", + uuid_bytes[0], uuid_bytes[1], uuid_bytes[2], uuid_bytes[3], + uuid_bytes[4], uuid_bytes[5], uuid_bytes[6], uuid_bytes[7], + uuid_bytes[8], uuid_bytes[9], uuid_bytes[10], uuid_bytes[11], + uuid_bytes[12], uuid_bytes[13], uuid_bytes[14], uuid_bytes[15]); + + // Breakpad id is the same as the uuid but with the additional trailing 0 + // for the breakpad id age. + breakpadId.AppendPrintf( + "%s" + "0" /* breakpad id age */, + uuid.get()); + } + + nsAutoString pathStr; + mozilla::Unused << NS_WARN_IF( + NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr))); + + nsAutoString nameStr = pathStr; + int32_t pos = nameStr.RFindChar('/'); + if (pos != kNotFound) { + nameStr.Cut(0, pos + 1); + } + + const NXArchInfo* archInfo = + NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype); + + info.AddSharedLibrary(SharedLibrary(start, start + size, 0, breakpadId, uuid, + nameStr, pathStr, nameStr, pathStr, ""_ns, + archInfo ? archInfo->name : "")); +} + +// Translate the statically stored sSharedLibrariesList information into a +// SharedLibraryInfo object. +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + SharedLibraryInfo sharedLibraryInfo; + + for (auto& info : *sSharedLibrariesList) { + addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo); + } + + // Add the entry for dyld itself. + // We only support macOS 10.12+, which corresponds to dyld version 15+. + // dyld version 15 added the dyldPath property. + task_dyld_info_data_t task_dyld_info; + mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; + if (task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&task_dyld_info, + &count) != KERN_SUCCESS) { + return sharedLibraryInfo; + } + + struct dyld_all_image_infos* aii = + (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr; + if (aii->version >= 15) { + const platform_mach_header* header = + reinterpret_cast( + aii->dyldImageLoadAddress); + addSharedLibrary(header, aii->dyldPath, sharedLibraryInfo); + } + + return sharedLibraryInfo; +} diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc new file mode 100644 index 0000000000..cb0bcd1f41 --- /dev/null +++ b/tools/profiler/core/shared-libraries-win32.cc @@ -0,0 +1,167 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +#include "shared-libraries.h" +#include "nsWindowsHelpers.h" +#include "mozilla/NativeNt.h" +#include "mozilla/WindowsEnumProcessModules.h" +#include "mozilla/WindowsProcessMitigations.h" +#include "mozilla/WindowsVersion.h" +#include "nsPrintfCString.h" + +static bool IsModuleUnsafeToLoad(const nsAString& aModuleName) { +#if defined(_M_AMD64) || defined(_M_IX86) + // Hackaround for Bug 1607574. Nvidia's shim driver nvd3d9wrap[x].dll detours + // LoadLibraryExW and it causes AV when the following conditions are met. + // 1. LoadLibraryExW was called for "detoured.dll" + // 2. nvinit[x].dll was unloaded + // 3. OS version is older than 6.2 +# if defined(_M_AMD64) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinitx.dll"; +# elif defined(_M_IX86) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinit.dll"; +# endif + if (aModuleName.LowerCaseEqualsLiteral("detoured.dll") && + !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) && + !::GetModuleHandleW(kNvidiaInitDriver)) { + return true; + } +#endif // defined(_M_AMD64) || defined(_M_IX86) + + // Hackaround for Bug 1723868. There is no safe way to prevent the module + // Microsoft's VP9 Video Decoder from being unloaded because mfplat.dll may + // have posted more than one task to unload the module in the work queue + // without calling LoadLibrary. + if (aModuleName.LowerCaseEqualsLiteral("msvp9dec_store.dll")) { + return true; + } + + return false; +} + +void AddSharedLibraryFromModuleInfo(SharedLibraryInfo& sharedLibraryInfo, + const wchar_t* aModulePath, + mozilla::Maybe aModule) { + nsDependentSubstring moduleNameStr( + mozilla::nt::GetLeafName(nsDependentString(aModulePath))); + + // If the module is unsafe to call LoadLibraryEx for, we skip. + if (IsModuleUnsafeToLoad(moduleNameStr)) { + return; + } + + // If EAF+ is enabled, parsing ntdll's PE header causes a crash. + if (mozilla::IsEafPlusEnabled() && + moduleNameStr.LowerCaseEqualsLiteral("ntdll.dll")) { + return; + } + + // Load the module again - to make sure that its handle will remain valid as + // we attempt to read the PDB information from it - or for the first time if + // we only have a path. We want to load the DLL without running the newly + // loaded module's DllMain function, but not as a data file because we want + // to be able to do RVA computations easily. Hence, we use the flag + // LOAD_LIBRARY_AS_IMAGE_RESOURCE which ensures that the sections (not PE + // headers) will be relocated by the loader. Otherwise GetPdbInfo() and/or + // GetVersionInfo() can cause a crash. If the original handle |aModule| is + // valid, LoadLibraryEx just increments its refcount. + nsModuleHandle handleLock( + ::LoadLibraryExW(aModulePath, NULL, LOAD_LIBRARY_AS_IMAGE_RESOURCE)); + if (!handleLock) { + return; + } + + mozilla::nt::PEHeaders headers(handleLock.get()); + if (!headers) { + return; + } + + mozilla::Maybe> bounds = headers.GetBounds(); + if (!bounds) { + return; + } + + // Put the original |aModule| into SharedLibrary, but we get debug info + // from |handleLock| as |aModule| might be inaccessible. + const uintptr_t modStart = + aModule.isSome() ? reinterpret_cast(*aModule) + : reinterpret_cast(handleLock.get()); + const uintptr_t modEnd = modStart + bounds->length(); + + nsAutoCString breakpadId; + nsAutoString pdbPathStr; + if (const auto* debugInfo = headers.GetPdbInfo()) { + MOZ_ASSERT(breakpadId.IsEmpty()); + const GUID& pdbSig = debugInfo->pdbSignature; + breakpadId.AppendPrintf( + "%08lX" // m0 + "%04X%04X" // m1,m2 + "%02X%02X%02X%02X%02X%02X%02X%02X" // m3 + "%X", // pdbAge + pdbSig.Data1, pdbSig.Data2, pdbSig.Data3, pdbSig.Data4[0], + pdbSig.Data4[1], pdbSig.Data4[2], pdbSig.Data4[3], pdbSig.Data4[4], + pdbSig.Data4[5], pdbSig.Data4[6], pdbSig.Data4[7], debugInfo->pdbAge); + + // The PDB file name could be different from module filename, + // so report both + // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb + pdbPathStr = NS_ConvertUTF8toUTF16(debugInfo->pdbFileName); + } + + nsAutoCString codeId; + DWORD timestamp; + DWORD imageSize; + if (headers.GetTimeStamp(timestamp) && headers.GetImageSize(imageSize)) { + codeId.AppendPrintf( + "%08lX" // Uppercase 8 digits of hex timestamp with leading zeroes. + "%lx", // Lowercase hex image size + timestamp, imageSize); + } + + nsAutoCString versionStr; + uint64_t version; + if (headers.GetVersionInfo(version)) { + versionStr.AppendPrintf("%u.%u.%u.%u", + static_cast((version >> 48) & 0xFFFFu), + static_cast((version >> 32) & 0xFFFFu), + static_cast((version >> 16) & 0xFFFFu), + static_cast(version & 0xFFFFu)); + } + + const nsString& pdbNameStr = + PromiseFlatString(mozilla::nt::GetLeafName(pdbPathStr)); + SharedLibrary shlib(modStart, modEnd, + 0, // DLLs are always mapped at offset 0 on Windows + breakpadId, codeId, PromiseFlatString(moduleNameStr), + nsDependentString(aModulePath), pdbNameStr, pdbPathStr, + versionStr, ""); + sharedLibraryInfo.AddSharedLibrary(shlib); +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo sharedLibraryInfo; + + auto addSharedLibraryFromModuleInfo = + [&sharedLibraryInfo](const wchar_t* aModulePath, HMODULE aModule) { + AddSharedLibraryFromModuleInfo(sharedLibraryInfo, aModulePath, + mozilla::Some(aModule)); + }; + + mozilla::EnumerateProcessModules(addSharedLibraryFromModuleInfo); + return sharedLibraryInfo; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoFromPath(const wchar_t* aPath) { + SharedLibraryInfo sharedLibraryInfo; + AddSharedLibraryFromModuleInfo(sharedLibraryInfo, aPath, mozilla::Nothing()); + return sharedLibraryInfo; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/tools/profiler/core/vtune/ittnotify.h b/tools/profiler/core/vtune/ittnotify.h new file mode 100644 index 0000000000..f1d65b3328 --- /dev/null +++ b/tools/profiler/core/vtune/ittnotify.h @@ -0,0 +1,4123 @@ +/* + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + Contact Information: + http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/ + + BSD LICENSE + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _ITTNOTIFY_H_ +#define _ITTNOTIFY_H_ + +/** +@file +@brief Public User API functions and types +@mainpage + +The ITT API is used to annotate a user's program with additional information +that can be used by correctness and performance tools. The user inserts +calls in their program. Those calls generate information that is collected +at runtime, and used by Intel(R) Threading Tools. + +@section API Concepts +The following general concepts are used throughout the API. + +@subsection Unicode Support +Many API functions take character string arguments. On Windows, there +are two versions of each such function. The function name is suffixed +by W if Unicode support is enabled, and by A otherwise. Any API function +that takes a character string argument adheres to this convention. + +@subsection Conditional Compilation +Many users prefer having an option to modify ITT API code when linking it +inside their runtimes. ITT API header file provides a mechanism to replace +ITT API function names inside your code with empty strings. To do this, +define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the +static library from the linker script. + +@subsection Domains +[see domains] +Domains provide a way to separate notification for different modules or +libraries in a program. Domains are specified by dotted character strings, +e.g. TBB.Internal.Control. + +A mechanism (to be specified) is provided to enable and disable +domains. By default, all domains are enabled. +@subsection Named Entities and Instances +Named entities (frames, regions, tasks, and markers) communicate +information about the program to the analysis tools. A named entity often +refers to a section of program code, or to some set of logical concepts +that the programmer wants to group together. + +Named entities relate to the programmer's static view of the program. When +the program actually executes, many instances of a given named entity +may be created. + +The API annotations denote instances of named entities. The actual +named entities are displayed using the analysis tools. In other words, +the named entities come into existence when instances are created. + +Instances of named entities may have instance identifiers (IDs). Some +API calls use instance identifiers to create relationships between +different instances of named entities. Other API calls associate data +with instances of named entities. + +Some named entities must always have instance IDs. In particular, regions +and frames always have IDs. Task and markers need IDs only if the ID is +needed in another API call (such as adding a relation or metadata). + +The lifetime of instance IDs is distinct from the lifetime of +instances. This allows various relationships to be specified separate +from the actual execution of instances. This flexibility comes at the +expense of extra API calls. + +The same ID may not be reused for different instances, unless a previous +[ref] __itt_id_destroy call for that ID has been issued. +*/ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# include "vtune/legacy/ittnotify.h" +#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup public Public API + * @{ + * @} + */ + +/** + * @defgroup control Collection Control + * @ingroup public + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); +/** @brief Detach collection */ +void ITTAPI __itt_detach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#define __itt_detach() +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} control group */ +/** @endcond */ + +/** + * @defgroup threads Threads + * @ingroup public + * Give names to threads + * @{ + */ +/** + * @brief Sets thread name of calling thread + * @param[in] name - name of thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_thread_set_nameA(const char *name); +void ITTAPI __itt_thread_set_nameW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thread_set_name __itt_thread_set_nameW +# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr +#else /* UNICODE */ +# define __itt_thread_set_name __itt_thread_set_nameA +# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_thread_set_name(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) +#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) +#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) +#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) +#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA(name) +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW(name) +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name(name) +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void ITTAPI __itt_thread_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, thread_ignore, (void)) +#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) +#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thread_ignore() +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} threads group */ + +/** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** + * @hideinitializer + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** + * @defgroup sync Synchronization + * @ingroup public + * Indicate user-written synchronization code + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** +@brief Name a synchronization object +@param[in] addr Handle for the synchronization object. You should +use a real address to uniquely identify the synchronization object. +@param[in] objtype null-terminated object type string. If NULL is +passed, the name will be "User Synchronization". +@param[in] objname null-terminated object name string. If NULL, +no name will be assigned to the object. +@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_create __itt_sync_createW +# define __itt_sync_create_ptr __itt_sync_createW_ptr +#else /* UNICODE */ +# define __itt_sync_create __itt_sync_createA +# define __itt_sync_create_ptr __itt_sync_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) +#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) +#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) +#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create ITTNOTIFY_VOID(sync_create) +#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA(addr, objtype, objname, attribute) +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW(addr, objtype, objname, attribute) +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create(addr, objtype, objname, attribute) +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** +@brief Rename a synchronization object + +You can use the rename call to assign or reassign a name to a given +synchronization object. +@param[in] addr handle for the synchronization object. +@param[in] name null-terminated object name string. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_renameA(void *addr, const char *name); +void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_rename __itt_sync_renameW +# define __itt_sync_rename_ptr __itt_sync_renameW_ptr +#else /* UNICODE */ +# define __itt_sync_rename __itt_sync_renameA +# define __itt_sync_rename_ptr __itt_sync_renameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_rename(void *addr, const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) +#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) +#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) +#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) +#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA(addr, name) +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW(addr, name) +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename(addr, name) +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + @brief Destroy a synchronization object. + @param addr Handle for the synchronization object. + */ +void ITTAPI __itt_sync_destroy(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) +#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) +#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_destroy(addr) +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/*****************************************************************//** + * @name group of functions is used for performance measurement tools + *********************************************************************/ +/** @{ */ +/** + * @brief Enter spin loop on user-defined sync object + */ +void ITTAPI __itt_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) +#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) +#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_prepare(addr) +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Quit spin loop without acquiring spin object + */ +void ITTAPI __itt_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) +#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) +#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_cancel(addr) +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Successful spin loop completion (sync object acquired) + */ +void ITTAPI __itt_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) +#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) +#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_acquired(addr) +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void ITTAPI __itt_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) +#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) +#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_releasing(addr) +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** @} sync group */ + +/**************************************************************//** + * @name group of functions is used for correctness checking tools + ******************************************************************/ +/** @{ */ +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_prepare(void* addr); + */ +void ITTAPI __itt_fsync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) +#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) +#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_prepare(addr) +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_cancel(void *addr); + */ +void ITTAPI __itt_fsync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) +#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) +#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_cancel(addr) +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_acquired(void *addr); + */ +void ITTAPI __itt_fsync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) +#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) +#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_acquired(addr) +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_releasing(void* addr); + */ +void ITTAPI __itt_fsync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) +#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) +#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_releasing(addr) +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** + * @defgroup model Modeling by Intel(R) Parallel Advisor + * @ingroup public + * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. + * This API is called ONLY using annotate.h, by "Annotation" macros + * the user places in their sources during the parallelism modeling steps. + * + * site_begin/end and task_begin/end take the address of handle variables, + * which are writeable by the API. Handles must be 0 initialized prior + * to the first call to begin, or may cause a run-time failure. + * The handles are initialized in a multi-thread safe way by the API if + * the handle is 0. The commonly expected idiom is one static handle to + * identify a site or task. If a site or task of the same name has already + * been started during this collection, the same handle MAY be returned, + * but is not required to be - it is unspecified if data merging is done + * based on name. These routines also take an instance variable. Like + * the lexical instance, these must be 0 initialized. Unlike the lexical + * instance, this is used to track a single dynamic instance. + * + * API used by the Intel(R) Parallel Advisor to describe potential concurrency + * and related activities. User-added source annotations expand to calls + * to these procedures to enable modeling of a hypothetical concurrent + * execution serially. + * @{ + */ +#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) + +typedef void* __itt_model_site; /*!< @brief handle for lexical site */ +typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ +typedef void* __itt_model_task; /*!< @brief handle for lexical site */ +typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum { + __itt_model_disable_observation, + __itt_model_disable_collection +} __itt_model_disable; + +#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ + +/** + * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. + * + * site_begin/end model a potential concurrency site. + * site instances may be recursively nested with themselves. + * site_end exits the most recently started but unended site for the current + * thread. The handle passed to end may be used to validate structure. + * Instances of a site encountered on different threads concurrently + * are considered completely distinct. If the site name for two different + * lexical sites match, it is unspecified whether they are treated as the + * same or different for data presentation. + */ +void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_site_beginW(const wchar_t *name); +#endif +void ITTAPI __itt_model_site_beginA(const char *name); +void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); +void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) +#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) +#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) +#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) +#endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) +#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) +#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) +#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) +#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_site_begin(site, instance, name) +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW(name) +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL(name, siteNameLen) +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end(site, instance) +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support + * + * task_begin/end model a potential task, which is contained within the most + * closely enclosing dynamic site. task_end exits the most recently started + * but unended task. The handle passed to end may be used to validate + * structure. It is unspecified if bad dynamic nesting is detected. If it + * is, it should be encoded in the resulting data collection. The collector + * should not fail due to construct nesting issues, nor attempt to directly + * indicate the problem. + */ +void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); +#endif +void ITTAPI __itt_model_task_beginA(const char *name); +void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) +#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) +#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) +#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) +#endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) +#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) +#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) +#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) +#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_task_begin(task, instance, name) +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW(name) +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL(name, siteNameLen) +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end(task, instance) +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support + * + * lock_acquire/release model a potential lock for both lockset and + * performance modeling. Each unique address is modeled as a separate + * lock, with invalid addresses being valid lock IDs. Specifically: + * no storage is accessed by the API at the specified address - it is only + * used for lock identification. Lock acquires may be self-nested and are + * unlocked by a corresponding number of releases. + * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, + * but may not have identical semantics.) + */ +void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); +void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) +#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) +#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) +#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) +#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_lock_acquire(lock) +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release(lock) +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support + * + * record_allocation/deallocation describe user-defined memory allocator + * behavior, which may be required for correctness modeling to understand + * when storage is not expected to be actually reused across threads. + */ +void ITTAPI __itt_model_record_allocation (void *addr, size_t size); +void ITTAPI __itt_model_record_deallocation(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) +#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) +#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) +#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) +#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_record_allocation(addr, size) +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation(addr) +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_INDUCTION_USES support + * + * Note particular storage is inductive through the end of the current site + */ +void ITTAPI __itt_model_induction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) +#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) +#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_induction_uses(addr, size) +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_REDUCTION_USES support + * + * Note particular storage is used for reduction through the end + * of the current site + */ +void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) +#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) +#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_reduction_uses(addr, size) +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_OBSERVE_USES support + * + * Have correctness modeling record observations about uses of storage + * through the end of the current site + */ +void ITTAPI __itt_model_observe_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) +#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) +#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_observe_uses(addr, size) +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_CLEAR_USES support + * + * Clear the special handling of a piece of storage related to induction, + * reduction or observe_uses + */ +void ITTAPI __itt_model_clear_uses(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) +#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) +#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_clear_uses(addr) +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support + * + * disable_push/disable_pop push and pop disabling based on a parameter. + * Disabling observations stops processing of memory references during + * correctness modeling, and all annotations that occur in the disabled + * region. This allows description of code that is expected to be handled + * specially during conversion to parallelism or that is not recognized + * by tools (e.g. some kinds of synchronization operations.) + * This mechanism causes all annotations in the disabled region, other + * than disable_push and disable_pop, to be ignored. (For example, this + * might validly be used to disable an entire parallel site and the contained + * tasks and locking in it for data collection purposes.) + * The disable for collection is a more expensive operation, but reduces + * collector overhead significantly. This applies to BOTH correctness data + * collection and performance data collection. For example, a site + * containing a task might only enable data collection for the first 10 + * iterations. Both performance and correctness data should reflect this, + * and the program should run as close to full speed as possible when + * collection is disabled. + */ +void ITTAPI __itt_model_disable_push(__itt_model_disable x); +void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) +ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) +#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) +#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) +#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) +#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_disable_push(x) +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop() +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} model group */ + +/** + * @defgroup heap Heap + * @ingroup public + * Heap group + * @{ + */ + +typedef void* __itt_heap_function; + +/** + * @brief Create an identification for heap function + * @return non-zero identifier or NULL + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); +__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_heap_function_create __itt_heap_function_createW +# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr +#else +# define __itt_heap_function_create __itt_heap_function_createA +# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) +#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) +#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) +#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) +#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation begin occurrence. + */ +void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) +#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) +#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_begin(h, size, initialized) +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation end occurrence. + */ +void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) +#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) +#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_end(h, addr, size, initialized) +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free begin occurrence. + */ +void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) +#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_begin(h, addr) +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free end occurrence. + */ +void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) +#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_end(h, addr) +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation begin occurrence. + */ +void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) +#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation end occurrence. + */ +void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) +#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access begin */ +void ITTAPI __itt_heap_internal_access_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) +#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) +#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_begin() +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access end */ +void ITTAPI __itt_heap_internal_access_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) +#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) +#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_end() +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ +/* ========================================================================== */ + +/** + * @defgroup domains Domains + * @ingroup public + * Domains group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_domain +{ + volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_domain* next; +} __itt_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup domains + * @brief Create a domain. + * Create domain using some domain name: the URI naming style is recommended. + * Because the set of domains is expected to be static over the application's + * execution time, there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of + * which thread created the domain. This call is thread-safe. + * @param[in] name name of domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_domain* ITTAPI __itt_domain_createA(const char *name); +__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_domain_create __itt_domain_createW +# define __itt_domain_create_ptr __itt_domain_createW_ptr +#else /* UNICODE */ +# define __itt_domain_create __itt_domain_createA +# define __itt_domain_create_ptr __itt_domain_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_domain* ITTAPI __itt_domain_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) +#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) +#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) +#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create ITTNOTIFY_DATA(domain_create) +#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA(name) (__itt_domain*)0 +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW(name) (__itt_domain*)0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create(name) (__itt_domain*)0 +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} domains group */ + +/** + * @defgroup ids IDs + * @ingroup public + * IDs group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_id +{ + unsigned long long d1, d2, d3; +} __itt_id; + +#pragma pack(pop) +/** @endcond */ + +const __itt_id __itt_null = { 0, 0, 0 }; + +/** + * @ingroup ids + * @brief A convenience function is provided to create an ID without domain control. + * @brief This is a convenience function to initialize an __itt_id structure. This function + * does not affect the collector runtime in any way. After you make the ID with this + * function, you still must create it with the __itt_id_create function before using the ID + * to identify a named entity. + * @param[in] addr The address of object; high QWORD of the ID value. + * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. + */ + +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +{ + __itt_id id = __itt_null; + id.d1 = (unsigned long long)((uintptr_t)addr); + id.d2 = (unsigned long long)extra; + id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ + return id; +} + +/** + * @ingroup ids + * @brief Create an instance of identifier. + * This establishes the beginning of the lifetime of an instance of + * the given ID in the trace. Once this lifetime starts, the ID + * can be used to tag named entity instances in calls such as + * __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * Instance IDs are not domain specific! + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) +#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create(domain,id) +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup ids + * @brief Destroy an instance of identifier. + * This ends the lifetime of the current instance of the given ID value in the trace. + * Any relationships that are established after this lifetime ends are invalid. + * This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) +#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_destroy(domain,id) +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} ids group */ + +/** + * @defgroup handless String Handles + * @ingroup public + * String Handles group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_string_handle +{ + const char* strA; /*!< Copy of original string in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* strW; /*!< Copy of original string in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* strW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_string_handle* next; +} __itt_string_handle; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup handles + * @brief Create a string handle. + * Create and return handle value that can be associated with a string. + * Consecutive calls to __itt_string_handle_create with the same name + * return the same value. Because the set of string handles is expected to remain + * static during the application's execution time, there is no mechanism to destroy a string handle. + * Any string handle can be accessed by any thread in the process, regardless of which thread created + * the string handle. This call is thread-safe. + * @param[in] name The input string + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); +__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_string_handle_create __itt_string_handle_createW +# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr +#else /* UNICODE */ +# define __itt_string_handle_create __itt_string_handle_createA +# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) +#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) +#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) +#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) +#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA(name) (__itt_string_handle*)0 +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW(name) (__itt_string_handle*)0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create(name) (__itt_string_handle*)0 +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} handles group */ + +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +#define __itt_timestamp_none ((__itt_timestamp)-1LL) + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to the current moment. + * This returns the timestamp in the format that is the most relevant for the current + * host or platform (RDTSC, QPC, and others). You can use the "<" operator to + * compare __itt_timestamp values. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @defgroup regions Regions + * @ingroup public + * Regions group + * @{ + */ +/** + * @ingroup regions + * @brief Begin of region instance. + * Successive calls to __itt_region_begin with the same ID are ignored + * until a call to __itt_region_end with the same ID + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance. Must not be __itt_null + * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null + * @param[in] name The name of this region + */ +void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup regions + * @brief End of region instance. + * The first call to __itt_region_end with a given ID ends the + * region. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_region_begin call. + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance + */ +void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) +#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) +#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) +#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) +#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_region_begin(d,x,y,z) +#define __itt_region_begin_ptr 0 +#define __itt_region_end(d,x) +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_region_begin_ptr 0 +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} regions group */ + +/** + * @defgroup frames Frames + * @ingroup public + * Frames are similar to regions, but are intended to be easier to use and to implement. + * In particular: + * - Frames always represent periods of elapsed time + * - By default, frames have no nesting relationships + * @{ + */ + +/** + * @ingroup frames + * @brief Begin a frame instance. + * Successive calls to __itt_frame_begin with the + * same ID are ignored until a call to __itt_frame_end with the same ID. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + */ +void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief End a frame instance. + * The first call to __itt_frame_end with a given ID + * ends the frame. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_frame_begin call. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL for current + */ +void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beginning of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) +#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) +#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) +#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) +#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin_v3(domain,id) +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3(domain,id) +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ +/** @endcond */ + +/** + * @defgroup taskgroup Task Group + * @ingroup public + * Task Group + * @{ + */ +/** + * @ingroup task_groups + * @brief Denotes a task_group instance. + * Successive calls to __itt_task_group with the same ID are ignored. + * @param[in] domain The domain for this task_group instance + * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. + * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. + * @param[in] name The name of this task_group + */ +void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) +#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_group(d,x,y,z) +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} taskgroup group */ + +/** + * @defgroup tasks Tasks + * @ingroup public + * A task instance represents a piece of work performed by a particular + * thread for a period of time. A call to __itt_task_begin creates a + * task instance. This becomes the current instance for that task on that + * thread. A following call to __itt_task_end on the same thread ends the + * instance. There may be multiple simultaneous instances of tasks with the + * same name on different threads. If an ID is specified, the task instance + * receives that ID. Nested tasks are allowed. + * + * Note: The task is defined by the bracketing of __itt_task_begin and + * __itt_task_end on the same thread. If some scheduling mechanism causes + * task switching (the thread executes a different user task) or task + * switching (the user task switches to a different thread) then this breaks + * the notion of current instance. Additional API calls are required to + * deal with that possibility. + * @{ + */ + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The identifier for this task instance (may be 0) + * @param[in] parentid The parent of this task (may be 0) + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup tasks + * @brief End the current task instance. + * @param[in] domain The domain for this task + */ +void ITTAPI __itt_task_end(const __itt_domain *domain); + +/** + * @ingroup tasks + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup tasks + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) +#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) +#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) +#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) +#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) +#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) +#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) +#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) +#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) +#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) +#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin(domain,id,parentid,name) +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn(domain,id,parentid,fn) +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end(domain) +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped(domain,taskid,parentid,name) +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped(domain,taskid) +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} tasks group */ + + +/** + * @defgroup markers Markers + * Markers represent a single discreet event in time. Markers have a scope, + * described by an enumerated type __itt_scope. Markers are created by + * the API call __itt_marker. A marker instance can be given an ID for use in + * adding metadata. + * @{ + */ + +/** + * @brief Describes the scope of an event object in the trace. + */ +typedef enum +{ + __itt_scope_unknown = 0, + __itt_scope_global, + __itt_scope_track_group, + __itt_scope_track, + __itt_scope_task, + __itt_scope_marker +} __itt_scope; + +/** @cond exclude_from_documentation */ +#define __itt_marker_scope_unknown __itt_scope_unknown +#define __itt_marker_scope_global __itt_scope_global +#define __itt_marker_scope_process __itt_scope_track_group +#define __itt_marker_scope_thread __itt_scope_track +#define __itt_marker_scope_task __itt_scope_task +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance + * @param[in] domain The domain for this marker + * @param[in] id The instance ID for this marker or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) +#define __itt_marker_ptr ITTNOTIFY_NAME(marker) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker(domain,id,name,scope) +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} markers group */ + +/** + * @defgroup metadata Metadata + * The metadata API is used to attach extra information to named + * entities. Metadata can be attached to an identified named entity by ID, + * or to the current entity (which is always a task). + * + * Conceptually metadata has a type (what kind of metadata), a key (the + * name of the metadata), and a value (the actual data). The encoding of + * the value depends on the type of the metadata. + * + * The type of metadata is specified by an enumerated type __itt_metdata_type. + * @{ + */ + +/** + * @ingroup parameters + * @brief describes the type of metadata + */ +typedef enum { + __itt_metadata_unknown = 0, + __itt_metadata_u64, /**< Unsigned 64-bit integer */ + __itt_metadata_s64, /**< Signed 64-bit integer */ + __itt_metadata_u32, /**< Unsigned 32-bit integer */ + __itt_metadata_s32, /**< Signed 32-bit integer */ + __itt_metadata_u16, /**< Unsigned 16-bit integer */ + __itt_metadata_s16, /**< Signed 16-bit integer */ + __itt_metadata_float, /**< Signed 32-bit floating-point */ + __itt_metadata_double /**< SIgned 64-bit floating-point */ +} __itt_metadata_type; + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) +#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add(d,x,y,z,a,b) +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add __itt_metadata_str_addW +# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add __itt_metadata_str_addA +# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) +#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) +#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) +#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) +#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW(d,x,y,z,a) +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} metadata group */ + +/** + * @defgroup relations Relations + * Instances of named entities can be explicitly associated with other + * instances using instance IDs and the relationship API calls. + * + * @{ + */ + +/** + * @ingroup relations + * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. + * Relations between instances can be added with an API call. The relation + * API uses instance IDs. Relations can be added before or after the actual + * instances are created and persist independently of the instances. This + * is the motivation for having different lifetimes for instance IDs and + * the actual instances. + */ +typedef enum +{ + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ +} __itt_relation; + +/** + * @ingroup relations + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); + +/** + * @ingroup relations + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) +#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) +#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) +#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current(d,x,y) +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add(d,x,y,z) +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} relations group */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_info +{ + unsigned long long clock_freq; /*!< Clock domain frequency */ + unsigned long long clock_base; /*!< Clock domain base timestamp */ +} __itt_clock_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_domain +{ + __itt_clock_info info; /*!< Most recent clock domain info */ + __itt_get_clock_info_fn fn; /*!< Callback function pointer */ + void* fn_data; /*!< Input argument for the callback function */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_clock_domain* next; +} __itt_clock_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Create a clock domain. + * Certain applications require the capability to trace their application using + * a clock domain different than the CPU, for instance the instrumentation of events + * that occur on a GPU. + * Because the set of domains is expected to be static over the application's execution time, + * there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of which thread created + * the domain. This call is thread-safe. + * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps + * @param[in] fn_data Argument for a callback function; may be NULL + */ +__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) +#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) +#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Recalculate clock domains frequences and clock base timestamps. + */ +void ITTAPI __itt_clock_domain_reset(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) +#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) +#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_reset() +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Create an instance of identifier. This establishes the beginning of the lifetime of + * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to + * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** + * @ingroup clockdomain + * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the + * given ID value in the trace. Any relationships that are established after this lifetime ends are + * invalid. This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) +#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) +#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) +#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create_ex(domain,clock_domain,timestamp,id) +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, or __itt_null + * @param[in] parentid The parent of this task, or __itt_null + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup clockdomain + * @brief End the current task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + */ +void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) +#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) +#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) +#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) +#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) +#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) +#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex(domain,clock_domain,timestamp) +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. + * Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @brief opaque structure for counter identification + */ +/** @cond exclude_from_documentation */ + +typedef struct ___itt_counter* __itt_counter; + +/** + * @brief Create an unsigned 64 bits integer counter with given name/domain + * + * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer + * + * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Increment the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Decrement the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) +#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) +#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec(id) +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Decrement the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) +#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_delta(id, value) +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls increment the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + +/** + * @ingroup counters + * @brief Decrement a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls decrement the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Decrement a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to decrement the counter + */ +void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) +#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) +#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) +#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_v3(domain,name) +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3(domain,name,delta) +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} counters group */ + + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) +#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) +#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value(id, value_ptr) +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) +#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) +#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create a typed counter with given name/domain + * + * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_typed __itt_counter_create_typedW +# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr +#else /* UNICODE */ +# define __itt_counter_create_typed __itt_counter_create_typedA +# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) +#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) +#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) +#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) +#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA(name, domain, type) +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW(name, domain, type) +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed(name, domain, type) +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or + * __itt_counter_create_typed() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @ingroup markers + * @brief Create a marker instance. + * @param[in] domain The domain for this marker + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The instance ID for this marker, or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) +#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); + +/** + * @ingroup clockdomain + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) +#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) +#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) +#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum ___itt_track_group_type +{ + __itt_track_group_type_normal = 0 +} __itt_track_group_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track_group +{ + __itt_string_handle* name; /*!< Name of the track group */ + struct ___itt_track* track; /*!< List of child tracks */ + __itt_track_group_type tgtype; /*!< Type of the track group */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track_group* next; +} __itt_track_group; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Placeholder for custom track types. Currently, "normal" custom track + * is the only available track type. + */ +typedef enum ___itt_track_type +{ + __itt_track_type_normal = 0 +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + , __itt_track_type_queue +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +} __itt_track_type; + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track +{ + __itt_string_handle* name; /*!< Name of the track group */ + __itt_track_group* group; /*!< Parent group to a track */ + __itt_track_type ttype; /*!< Type of the track */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track* next; +} __itt_track; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create logical track group. + */ +__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) +#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) +#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_group_create(name) (__itt_track_group*)0 +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create logical track. + */ +__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) +#define __itt_track_create ITTNOTIFY_DATA(track_create) +#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the logical track. + */ +void ITTAPI __itt_set_track(__itt_track* track); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) +#define __itt_set_track ITTNOTIFY_VOID(set_track) +#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_set_track(track) +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/* ========================================================================== */ +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup events Events + * @ingroup public + * Events group + * @{ + */ +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ +typedef enum +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + +/** + * @brief Module load info + * This API is used to report necessary information in case of module relocation + * @param[in] start_addr - relocated module start address + * @param[in] end_addr - relocated module end address + * @param[in] path - file system path to the module + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); +void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_module_load __itt_module_loadW +# define __itt_module_load_ptr __itt_module_loadW_ptr +#else /* UNICODE */ +# define __itt_module_load __itt_module_loadA +# define __itt_module_load_ptr __itt_module_loadA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) +ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) +#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) +#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) +#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load ITTNOTIFY_VOID(module_load) +#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA(start_addr, end_addr, path) +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW(start_addr, end_addr, path) +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load(start_addr, end_addr, path) +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_H_ */ + +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + +#ifndef _ITTNOTIFY_PRIVATE_ +#define _ITTNOTIFY_PRIVATE_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @ingroup clockdomain + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) +#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) +#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) +#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) +#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup makrs_internal Marks + * @ingroup internal + * Marks group + * @warning Internal API: + * - It is not shipped to outside of Intel + * - It is delivered to internal Intel teams using e-mail or SVN access only + * @{ + */ +/** @brief user mark type */ +typedef int __itt_mark_type; + +/** + * @brief Creates a user mark type with the specified name using char or Unicode string. + * @param[in] name - name of mark to create + * @return Returns a handle to the mark type + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_mark_type ITTAPI __itt_mark_createA(const char *name); +__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_create __itt_mark_createW +# define __itt_mark_create_ptr __itt_mark_createW_ptr +#else /* UNICODE */ +# define __itt_mark_create __itt_mark_createA +# define __itt_mark_create_ptr __itt_mark_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_mark_type ITTAPI __itt_mark_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) +#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) +#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) +#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create ITTNOTIFY_DATA(mark_create) +#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA(name) (__itt_mark_type)0 +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW(name) (__itt_mark_type)0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create(name) (__itt_mark_type)0 +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. + * + * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. + * - The call is "synchronous" - function returns after mark is actually added to results. + * - This function is useful, for example, to mark different phases of application + * (beginning of the next mark automatically meand end of current region). + * - Can be used together with "continuous" marks (see below) at the same collection session + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @param[in] parameter - string parameter of mark + * @return Returns zero value in case of success, non-zero value otherwise. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark __itt_markW +# define __itt_mark_ptr __itt_markW_ptr +#else /* UNICODE */ +# define __itt_mark __itt_markA +# define __itt_mark_ptr __itt_markA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA ITTNOTIFY_DATA(markA) +#define __itt_markA_ptr ITTNOTIFY_NAME(markA) +#define __itt_markW ITTNOTIFY_DATA(markW) +#define __itt_markW_ptr ITTNOTIFY_NAME(markW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark ITTNOTIFY_DATA(mark) +#define __itt_mark_ptr ITTNOTIFY_NAME(mark) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA(mt, parameter) (int)0 +#define __itt_markA_ptr 0 +#define __itt_markW(mt, parameter) (int)0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark(mt, parameter) (int)0 +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA_ptr 0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create a "discrete" user event type (mark) for process + * rather then for one thread + * @see int __itt_mark(__itt_mark_type mt, const char* parameter); + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_global __itt_mark_globalW +# define __itt_mark_global_ptr __itt_mark_globalW_ptr +#else /* UNICODE */ +# define __itt_mark_global __itt_mark_globalA +# define __itt_mark_global_ptr __itt_mark_globalA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) +#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) +#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) +#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global ITTNOTIFY_DATA(mark_global) +#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA(mt, parameter) (int)0 +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW(mt, parameter) (int)0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global(mt, parameter) (int)0 +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates an "end" point for "continuous" mark with specified name. + * + * - Returns zero value in case of success, non-zero value otherwise. + * Also returns non-zero value when preceding "begin" point for the + * mark with the same name failed to be created or not created. + * - The mark of "continuous" type is placed to collection results in + * case of success. It appears in overtime view(s) as a special tick + * sign (different from "discrete" mark) together with line from + * corresponding "begin" mark to "end" mark. + * @note Continuous marks can overlap and be nested inside each other. + * Discrete mark can be nested inside marked region + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @return Returns zero value in case of success, non-zero value otherwise. + */ +int ITTAPI __itt_mark_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) +#define __itt_mark_off ITTNOTIFY_DATA(mark_off) +#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_off(mt) (int)0 +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create an "end" point for mark of process + * @see int __itt_mark_off(__itt_mark_type mt); + */ +int ITTAPI __itt_mark_global_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) +#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) +#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_global_off(mt) (int)0 +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} marks group */ + +/** + * @defgroup counters_internal Counters + * @ingroup internal + * Counters group + * @{ + */ + + +/** + * @defgroup stitch Stack Stitching + * @ingroup internal + * Stack Stitching group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_caller *__itt_caller; + +/** + * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. + * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. + */ +__itt_caller ITTAPI __itt_stack_caller_create(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) +#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) +#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_create() (__itt_caller)0 +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + */ +void ITTAPI __itt_stack_caller_destroy(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) +#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) +#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_destroy(id) +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Sets the cut point. Stack from each event which occurs after this call will be cut + * at the same stack level the function was called and stitched to the corresponding stitch point. + */ +void ITTAPI __itt_stack_callee_enter(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) +#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) +#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_enter(id) +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). + */ +void ITTAPI __itt_stack_callee_leave(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) +#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) +#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_leave(id) +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} stitch group */ + +/* ***************************************************************************************************************************** */ + +#include + +/** @cond exclude_from_documentation */ +typedef enum __itt_error_code +{ + __itt_error_success = 0, /*!< no error */ + __itt_error_no_module = 1, /*!< module can't be loaded */ + /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + __itt_error_no_symbol = 2, /*!< symbol not found */ + /* %1$s -- library name, %2$s -- symbol name. */ + __itt_error_unknown_group = 3, /*!< unknown group specified */ + /* %1$s -- env var name, %2$s -- group name. */ + __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ + /* %1$s -- env var name, %2$d -- system error. */ + __itt_error_env_too_long = 5, /*!< variable value too long */ + /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ + __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ + /* %1$s -- function name, %2$d -- errno. */ +} __itt_error_code; + +typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); +__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); + +const char* ITTAPI __itt_api_version(void); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) +void __itt_error_handler(__itt_error_code code, va_list args); +extern const int ITTNOTIFY_NAME(err); +#define __itt_err ITTNOTIFY_NAME(err) +ITT_STUB(ITTAPI, const char*, api_version, (void)) +#define __itt_api_version ITTNOTIFY_DATA(api_version) +#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_api_version() (const char*)0 +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_PRIVATE_ */ + +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ diff --git a/tools/profiler/docs/buffer.rst b/tools/profiler/docs/buffer.rst new file mode 100644 index 0000000000..dd7ef30dfd --- /dev/null +++ b/tools/profiler/docs/buffer.rst @@ -0,0 +1,70 @@ +Buffers and Memory Management +============================= + +In a post-Fission world, precise memory management across many threads and processes is +especially important. In order for the profiler to achieve this, it uses a chunked buffer +strategy. + +The `ProfileBuffer`_ is the overall buffer class that controls the memory and storage +for the profile, it allows allocating objects into it. This can be used freely +by things like markers and samples to store data as entries, without needing to know +about the general strategy for how the memory is managed. + +The `ProfileBuffer`_ is then backed by the `ProfileChunkedBuffer`_. This specialized +buffer grows incrementally, by allocating additional `ProfileBufferChunk`_ objects. +More and more chunks will be allocated until a memory limit is reached, where they will +be released. After releasing, the chunk will either be recycled or freed. + +The limiting of memory usage is coordinated by the `ProfilerParent`_ in the parent +process. The `ProfilerParent`_ and `ProfilerChild`_ exchange IPC messages with information +about how much memory is being used. When the maximum byte threshold is passed, +the ProfileChunkManager in the parent process removes the oldest chunk, and then the +`ProfilerParent`_ sends a `DestroyReleasedChunksAtOrBefore`_ message to all of child +processes so that the oldest chunks in the profile are released. This helps long profiles +to keep having data in a similar time frame. + +Profile Buffer Terminology +########################## + +ProfilerParent + The main profiler machinery is installed in the parent process. It uses IPC to + communicate to the child processes. The PProfiler is the actor which is used + to communicate across processes to coordinate things. See `ProfilerParent.h`_. The + ProfilerParent uses the DestroyReleasedChunksAtOrBefore meessage to control the + overall chunk limit. + +ProfilerChild + ProfilerChild is installed in every child process, it will receive requests from + DestroyReleasedChunksAtOrBefore. + +Entry + This is an individual entry in the `ProfileBuffer.h`_,. These entry sizes are not + related to the chunks sizes. An individual entry can straddle two different chunks. + An entry can contain various pieces of data, like markers, samples, and stacks. + +Chunk + An arbitrary sized chunk of memory, managed by the `ProfileChunkedBuffer`_, and + IPC calls from the ProfilerParent. + +Unreleased Chunk + This chunk is currently being used to write entries into. + +Released chunk + This chunk is full of data. When memory limits happen, it can either be recycled + or freed. + +Recycled chunk + This is a chunk that was previously written into, and full. When memory limits occur, + rather than freeing the memory, it is re-used as the next chunk. + +.. _ProfileChunkedBuffer: https://searchfox.org/mozilla-central/search?q=ProfileChunkedBuffer&path=&case=true®exp=false +.. _ProfileChunkManager: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunkManager.h&path=&case=true®exp=false +.. _ProfileBufferChunk: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunk&path=&case=true®exp=false +.. _ProfileBufferChunkManagerWithLocalLimit: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunkManagerWithLocalLimit&case=true&path= +.. _ProfilerParent.h: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerParent.h +.. _ProfilerChild.h: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerChild.h +.. _ProfileBuffer.h: https://searchfox.org/mozilla-central/source/tools/profiler/core/ProfileBuffer.h +.. _ProfileBuffer: https://searchfox.org/mozilla-central/search?q=ProfileBuffer&path=&case=true®exp=false +.. _ProfilerParent: https://searchfox.org/mozilla-central/search?q=ProfilerParent&path=&case=true®exp=false +.. _ProfilerChild: https://searchfox.org/mozilla-central/search?q=ProfilerChild&path=&case=true®exp=false +.. _DestroyReleasedChunksAtOrBefore: https://searchfox.org/mozilla-central/search?q=DestroyReleasedChunksAtOrBefore&path=&case=true®exp=false diff --git a/tools/profiler/docs/code-overview.rst b/tools/profiler/docs/code-overview.rst new file mode 100644 index 0000000000..3ca662e141 --- /dev/null +++ b/tools/profiler/docs/code-overview.rst @@ -0,0 +1,1494 @@ +Profiler Code Overview +###################### + +This is an overview of the code that implements the Profiler inside Firefox +with dome details around tricky subjects, or pointers to more detailed +documentation and/or source code. + +It assumes familiarity with Firefox development, including Mercurial (hg), mach, +moz.build files, Try, Phabricator, etc. + +It also assumes knowledge of the user-visible part of the Firefox Profiler, that +is: How to use the Firefox Profiler, and what profiles contain that is shown +when capturing a profile. See the main website https://profiler.firefox.com, and +its `documentation `_. + +For just an "overview", it may look like a huge amount of information, but the +Profiler code is indeed quite expansive, so it takes a lot of words to explain +even just a high-level view of it! For on-the-spot needs, it should be possible +to search for some terms here and follow the clues. But for long-term +maintainers, it would be worth skimming this whole document to get a grasp of +the domain, and return to get some more detailed information before diving into +the code. + +WIP note: This document should be correct at the time it is written, but the +profiler code constantly evolves to respond to bugs or to provide new exciting +features, so this document could become obsolete in parts! It should still be +useful as an overview, but its correctness should be verified by looking at the +actual code. If you notice any significant discrepancy or broken links, please +help by +`filing a bug `_. + +***** +Terms +***** + +This is the common usage for some frequently-used terms, as understood by the +Dev Tools team. But incorrect usage can sometimes happen, context is key! + +* **profiler** (a): Generic name for software that enables the profiling of + code. (`"Profiling" on Wikipedia `_) +* **Profiler** (the): All parts of the profiler code inside Firefox. +* **Base Profiler** (the): Parts of the Profiler that live in + mozglue/baseprofiler, and can be used from anywhere, but has limited + functionality. +* **Gecko Profiler** (the): Parts of the Profiler that live in tools/profiler, + and can only be used from other code in the XUL library. +* **Profilers** (the): Both the Base Profiler and the Gecko Profiler. +* **profiling session**: This is the time during which the profiler is running + and collecting data. +* **profile** (a): The output from a profiling session, either as a file, or a + shared viewable profile on https://profiler.firefox.com +* **Profiler back-end** (the): Other name for the Profiler code inside Firefox, + to distinguish it from... +* **Profiler front-end** (the): The website https://profiler.firefox.com that + displays profiles captured by the back-end. +* **Firefox Profiler** (the): The whole suite comprised of the back-end and front-end. + +****************** +Guiding Principles +****************** + +When working on the profiler, here are some guiding principles to keep in mind: + +* Low profiling overhead in cpu and memory. For the Profiler to provide the best + value, it should stay out of the way and consume as few resources (in time and + memory) as possible, so as not to skew the actual Firefox code too much. + +* Common data structures and code should be in the Base Profiler when possible. + + WIP note: Deduplication is slowly happening, see + `meta bug 1557566 `_. + This document focuses on the Profiler back-end, and mainly the Gecko Profiler + (because this is where most of the code lives, the Base Profiler is mostly a + subset, originally just a cut-down version of the Gecko Profiler); so unless + specified, descriptions below are about the Gecko Profiler, but know that + there may be some equivalent code in the Base Profiler as well. + +* Use appropriate programming-language features where possible to reduce coding + errors in both our code, and our users' usage of it. In C++, this can be done + by using a specific class/struct types for a given usage, to avoid misuse + (e.g., an generic integer representing a **process** could be incorrectly + given to a function expecting a **thread**; we have specific types for these + instead, more below.) + +* Follow the + `Coding Style `_. + +* Whenever possible, write tests (if not present already) for code you add or + modify -- but this may be too difficult in some case, use good judgement and + at least test manually instead. + +****************** +Profiler Lifecycle +****************** + +Here is a high-level view of the Base **or** Gecko Profiler lifecycle, as part +of a Firefox run. The following sections will go into much more details. + +* Profiler initialization, preparing some common data. +* Threads de/register themselves as they start and stop. +* During each User/test-controlled profiling session: + + * Profiler start, preparing data structures that will store the profiling data. + * Periodic sampling from a separate thread, happening at a user-selected + frequency (usually once every 1-2 ms), and recording snapshots of what + Firefox is doing: + + * CPU sampling, measuring how much time each thread has spent actually + running on the CPU. + * Stack sampling, capturing a stack of functions calls from whichever leaf + function the program is in at this point in time, up to the top-most + caller (i.e., at least the ``main()`` function, or its callers if any). + Note that unlike most external profilers, the Firefox Profiler back-end + is capable or getting more useful information than just native functions + calls (compiled from C++ or Rust): + + * Labels added by Firefox developers along the stack, usually to identify + regions of code that perform "interesting" operations (like layout, file + I/Os, etc.). + * JavaScript function calls, including the level of optimization applied. + * Java function calls. + * At any time, Markers may record more specific details of what is happening, + e.g.: User operations, page rendering steps, garbage collection, etc. + * Optional profiler pause, which stops most recording, usually near the end of + a session so that no data gets recorded past this point. + * Profile JSON output, generated from all the recorded profiling data. + * Profiler stop, tearing down profiling session objects. +* Profiler shutdown. + +Note that the Base Profiler can start earlier, and then the data collected so +far, as well as the responsibility for periodic sampling, is handed over to the +Gecko Profiler: + +#. (Firefox starts) +#. Base Profiler init +#. Base Profiler start +#. (Firefox loads the libxul library and initializes XPCOM) +#. Gecko Profiler init +#. Gecko Profiler start +#. Handover from Base to Gecko +#. Base Profiler stop +#. (Bulk of the profiling session) +#. JSON generation +#. Gecko Profiler stop +#. Gecko Profiler shutdown +#. (Firefox ends XPCOM) +#. Base Profiler shutdown +#. (Firefox exits) + +Base Profiler functions that add data (mostly markers and labels) may be called +from anywhere, and will be recorded by either Profiler. The corresponding +functions in Gecko Profiler can only be called from other libxul code, and can +only be recorded by the Gecko Profiler. + +Whenever possible, Gecko Profiler functions should be preferred if accessible, +as they may provide extended functionality (e.g., better stacks with JS in +markers). Otherwise fallback on Base Profiler functions. + +*********** +Directories +*********** + +* Non-Profiler supporting code + + * `mfbt `_ - Mostly + replacements for C++ std library facilities. + + * `mozglue/misc `_ + + * `PlatformMutex.h `_ - + Mutex base classes. + * `StackWalk.h `_ - + Stack-walking functions. + * `TimeStamp.h `_ - + Timestamps and time durations. + + * `xpcom `_ + + * `ds `_ - + Data structures like arrays, strings. + + * `threads `_ - + Threading functions. + +* Profiler back-end + + * `mozglue/baseprofiler `_ - + Base Profiler code, usable from anywhere in Firefox. Because it lives in + mozglue, it's loaded right at the beginning, so it's possible to start the + profiler very early, even before Firefox loads its big&heavy "xul" library. + + * `baseprofiler's public `_ - + Public headers, may be #included from anywhere. + * `baseprofiler's core `_ - + Main implementation code. + * `baseprofiler's lul `_ - + Special stack-walking code for Linux. + * `../tests/TestBaseProfiler.cpp `_ - + Unit tests. + + * `tools/profiler `_ - + Gecko Profiler code, only usable from the xul library. That library is + loaded a short time after Firefox starts, so the Gecko Profiler is not able + to profile the early phase of the application, Base Profiler handles that, + and can pass its collected data to the Gecko Profiler when the latter + starts. + + * `public `_ - + Public headers, may be #included from most libxul code. + * `core `_ - + Main implementation code. + * `gecko `_ - + Control from JS, and multi-process/IPC code. + * `lul `_ - + Special stack-walking code for Linux. + * `rust-api `_, + `rust-helper `_ + * `tests `_ + + * `devtools/client/performance-new `_, + `devtools/shared/performance-new `_ - + Middleware code for about:profiling and devtools panel functionality. + + * js, starting with + `js/src/vm/GeckoProfiler.h `_ - + JavaScript engine support, mostly to capture JS stacks. + + * `toolkit/components/extensions/schemas/geckoProfiler.json `_ - + File that needs to be updated when Profiler features change. + +* Profiler front-end + + * Out of scope for this document, but its code and bug repository can be found at: + https://github.com/firefox-devtools/profiler . Sometimes work needs to be + done on both the back-end of the front-end, especially when modifying the + back-end's JSON output format. + +******* +Headers +******* + +The most central public header is +`GeckoProfiler.h `_, +from which almost everything else can be found, it can be a good starting point +for exploration. +It includes other headers, which together contain important top-level macros and +functions. + +WIP note: GeckoProfiler.h used to be the header that contained everything! +To better separate areas of functionality, and to hopefully reduce compilation +times, parts of it have been split into smaller headers, and this work will +continue, see `bug 1681416 `_. + +MOZ_GECKO_PROFILER and Macros +============================= + +Mozilla officially supports the Profiler on `tier-1 platforms +`_: +Windows, macos, Linux and Android. +There is also some code running on tier 2-3 platforms (e.g., for FreeBSD), but +the team at Mozilla is not obligated to maintain it; we do try to keep it +running, and some external contributors are keeping an eye on it and provide +patches when things do break. + +To reduce the burden on unsupported platforms, a lot of the Profilers code is +only compiled when ``MOZ_GECKO_PROFILER`` is #defined. This means that some +public functions may not always be declared or implemented, and should be +surrounded by guards like ``#ifdef MOZ_GECKO_PROFILER``. + +Some commonly-used functions offer an empty definition in the +non-``MOZ_GECKO_PROFILER`` case, so these functions may be called from anywhere +without guard. + +Other functions have associated macros that can always be used, and resolve to +nothing on unsupported platforms. E.g., +``PROFILER_REGISTER_THREAD`` calls ``profiler_register_thread`` where supported, +otherwise does nothing. + +WIP note: There is an effort to eventually get rid of ``MOZ_GECKO_PROFILER`` and +its associated macros, see +`bug 1635350 `_. + +RAII "Auto" macros and classes +============================== +A number of functions are intended to be called in pairs, usually to start and +then end some operation. To ease their use, and ensure that both functions are +always called together, they usually have an associated class and/or macro that +may be called only once. This pattern of using an object's destructor to ensure +that some action always eventually happens, is called +`RAII `_ in C++, with the +common prefix "auto". + +E.g.: In ``MOZ_GECKO_PROFILER`` builds, +`AUTO_PROFILER_INIT `_ +instantiates an +`AutoProfilerInit `_ +object, which calls ``profiler_init`` when constructed, and +``profiler_shutdown`` when destroyed. + +********************* +Platform Abstractions +********************* + +This section describes some platform abstractions that are used throughout the +Profilers. (Other platform abstractions will be described where they are used.) + +Process and Thread IDs +====================== + +The Profiler back-end often uses process and thread IDs (aka "pid" and "tid"), +which are commonly just a number. +For better code correctness, and to hide specific platform details, they are +encapsulated in opaque types +`BaseProfilerProcessId `_ +and +`BaseProfilerThreadId `_. +These types should be used wherever possible. +When interfacing with other code, they may be converted using the member +functions ``FromNumber`` and ``ToNumber``. + +To find the current process or thread ID, use +`profiler_current_process_id `_ +or +`profiler_current_thread_id `_. + +The main thread ID is available through +`profiler_main_thread_id `_ +(assuming +`profiler_init_main_thread_id `_ +was called when the application started -- especially important in stand-alone +test programs.) +And +`profiler_is_main_thread `_ +is a quick way to find out if the current thread is the main thread. + +Locking +======= +The locking primitives in PlatformMutex.h are not supposed to be used as-is, but +through a user-accessible implementation. For the Profilers, this is in +`BaseProfilerDetail.h `_. + +In addition to the usual ``Lock``, ``TryLock``, and ``Unlock`` functions, +`BaseProfilerMutex `_ +objects have a name (which may be helpful when debugging), +they record the thread on which they are locked (making it possible to know if +the mutex is locked on the current thread), and in ``DEBUG`` builds there are +assertions verifying that the mutex is not incorrectly used recursively, to +verify the correct ordering of different Profiler mutexes, and that it is +unlocked before destruction. + +Mutexes should preferably be locked within C++ block scopes, or as class +members, by using +`BaseProfilerAutoLock `_. + +Some classes give the option to use a mutex or not (so that single-threaded code +can more efficiently bypass locking operations), for these we have +`BaseProfilerMaybeMutex `_ +and +`BaseProfilerMaybeAutoLock `_. + +There is also a special type of shared lock (aka RWLock, see +`RWLock on wikipedia `_), +which may be locked in multiple threads (through ``LockShared`` or preferably +`BaseProfilerAutoLockShared `_), +or locked exclusively, preventing any other locking (through ``LockExclusive`` or preferably +`BaseProfilerAutoLockExclusive `_). + +********************* +Main Profiler Classes +********************* + +Diagram showing the most important Profiler classes, see details in the +following sections: + +(As noted, the "RegisteredThread" classes are now obsolete in the Gecko +Profiler, see the "Thread Registration" section below for an updated diagram and +description.) + +.. image:: profilerclasses-20220913.png + +*********************** +Profiler Initialization +*********************** + +`profiler_init `_ +and +`baseprofiler::profiler_init `_ +must be called from the main thread, and are used to prepare important aspects +of the profiler, including: + +* Making sure the main thread ID is recorded. +* Handling ``MOZ_PROFILER_HELP=1 ./mach run`` to display the command-line help. +* Creating the ``CorePS`` instance -- more details below. +* Registering the main thread. +* Initializing some platform-specific code. +* Handling other environment variables that are used to immediately start the + profiler, with optional settings provided in other env-vars. + +CorePS +====== + +The `CorePS class `_ +has a single instance that should live for the duration of the Firefox +application, and contains important information that could be needed even when +the Profiler is not running. + +It includes: + +* A static pointer to its single instance. +* The process start time. +* JavaScript-specific data structures. +* A list of registered + `PageInformations `_, + used to keep track of the tabs that this process handles. +* A list of + `BaseProfilerCounts `_, + used to record things like the process memory usage. +* The process name, and optionally the "eTLD+1" (roughly sub-domain) that this + process handles. +* In the Base Profiler only, a list of + `RegisteredThreads `_. + WIP note: This storage has been reworked in the Gecko Profiler (more below), + and in practice the Base Profiler only registers the main thread. This should + eventually disappear as part of the de-duplication work + (`bug 1557566 `_). + +******************* +Thread Registration +******************* + +Threads need to register themselves in order to get fully profiled. +This section describes the main data structures that record the list of +registered threads and their data. + +WIP note: There is some work happening to add limited profiling of unregistered +threads, with the hope that more and more functionality could be added to +eventually use the same registration data structures. + +Diagram showing the relevant classes, see details in the following sub-sections: + +.. image:: profilerthreadregistration-20220913.png + +ProfilerThreadRegistry +====================== + +The +`static ProfilerThreadRegistry object `_ +contains a list of ``OffThreadRef`` objects. + +Each ``OffThreadRef`` points to a ``ProfilerThreadRegistration``, and restricts +access to a safe subset of the thread data, and forces a mutex lock if necessary +(more information under ProfilerThreadRegistrationData below). + +ProfilerThreadRegistration +========================== + +A +`ProfilerThreadRegistration object `_ +contains a lot of information relevant to its thread, to help with profiling it. + +This data is accessible from the thread itself through an ``OnThreadRef`` +object, which points to the ``ThreadRegistration``, and restricts access to a +safe subset of thread data, and forces a mutex lock if necessary (more +information under ProfilerThreadRegistrationData below). + +ThreadRegistrationData and accessors +==================================== + +`The ProfilerThreadRegistrationData.h header `_ +contains a hierarchy of classes that encapsulate all the thread-related data. + +``ThreadRegistrationData`` contains all the actual data members, including: + +* Some long-lived + `ThreadRegistrationInfo `_, + containing the thread name, its registration time, the thread ID, and whether + it's the main thread. +* A ``ProfilingStack`` that gathers developer-provided pseudo-frames, and JS + frames. +* Some platform-specific ``PlatformData`` (usually required to actually record + profiling measurements for that thread). +* A pointer to the top of the stack. +* A shared pointer to the thread's ``nsIThread``. +* A pointer to the ``JSContext``. +* An optional pre-allocated ``JsFrame`` buffer used during stack-sampling. +* Some JS flags. +* Sleep-related data (to avoid costly sampling while the thread is known to not + be doing anything). +* The current ``ThreadProfilingFeatures``, to know what kind of data to record. +* When profiling, a pointer to a ``ProfiledThreadData``, which contains some + more data needed during and just after profiling. + +As described in their respective code comments, each data member is supposed to +be accessed in certain ways, e.g., the ``JSContext`` should only be "written +from thread, read from thread and suspended thread". To enforce these rules, +data members can only be accessed through certain classes, which themselves can +only be instantiated in the correct conditions. + +The accessor classes are, from base to most-derived: + +* ``ThreadRegistrationData``, not an accessor itself, but it's the base class + with all the ``protected`` data. +* ``ThreadRegistrationUnlockedConstReader``, giving unlocked ``const`` access to + the ``ThreadRegistrationInfo``, ``PlatformData``, and stack top. +* ``ThreadRegistrationUnlockedConstReaderAndAtomicRW``, giving unlocked + access to the atomic data members: ``ProfilingStack``, sleep-related data, + ``ThreadProfilingFeatures``. +* ``ThreadRegistrationUnlockedRWForLockedProfiler``, giving access that's + protected by the Profiler's main lock, but doesn't require a + ``ThreadRegistration`` lock, to the ``ProfiledThreadData`` +* ``ThreadRegistrationUnlockedReaderAndAtomicRWOnThread``, giving unlocked + mutable access, but only on the thread itself, to the ``JSContext``. +* ``ThreadRegistrationLockedRWFromAnyThread``, giving locked access from any + thread to mutex-protected data: ``ThreadProfilingFeatures``, ``JsFrame``, + ``nsIThread``, and the JS flags. +* ``ThreadRegistrationLockedRWOnThread``, giving locked access, but only from + the thread itself, to the ``JSContext`` and a JS flag-related operation. +* ``ThreadRegistration::EmbeddedData``, containing all of the above, and stored + as a data member in each ``ThreadRegistration``. + +To recapitulate, if some code needs some data on the thread, it can use +``ThreadRegistration`` functions to request access (with the required rights, +like a mutex lock). +To access data about another thread, use similar functions from +``ThreadRegistry`` instead. +You may find some examples in the implementations of the functions in +ProfilerThreadState.h (see the following section). + +ProfilerThreadState.h functions +=============================== + +The +`ProfilerThreadState.h `_ +header provides a few helpful functions related to threads, including: + +* ``profiler_is_active_and_thread_is_registered`` +* ``profiler_thread_is_being_profiled`` (for the current thread or another + thread, and for a given set of features) +* ``profiler_thread_is_sleeping`` + +************** +Profiler Start +************** + +There are multiple ways to start the profiler, through command line env-vars, +and programmatically in C++ and JS. + +The main public C++ function is +`profiler_start `_. +It takes all the features specifications, and returns a promise that gets +resolved when the Profiler has fully started in all processes (multi-process +profiling is described later in this document, for now the focus will be on each +process running its instance of the Profiler). It first calls ``profiler_init`` +if needed, and also ``profiler_stop`` if the profiler was already running. + +The main implementation, which can be called from multiple sources, is +`locked_profiler_start `_. +It performs a number of operations to start the profiling session, including: + +* Record the session start time. +* Pre-allocate some work buffer to capture stacks for markers on the main thread. +* In the Gecko Profiler only: If the Base Profiler was running, take ownership + of the data collected so far, and stop the Base Profiler (we don't want both + trying to collect the same data at the same time!) +* Create the ActivePS, which keeps track of most of the profiling session + information, more about it below. +* For each registered thread found in the ``ThreadRegistry``, check if it's one + of the threads to profile, and if yes set the appropriate data into the + corresponding ``ThreadRegistrationData`` (including informing the JS engine to + start recording profiling data). +* On Android, start the Java sampler. +* If native allocations are to be profiled, setup the appropriate hooks. +* Start the audio callback tracing if requested. +* Set the public shared "active" state, used by many functions to quickly assess + whether to actually record profiling data. + +ActivePS +======== + +The `ActivePS class `_ +has a single instance at a time, that should live for the length of the +profiling session. + +It includes: + +* The session start time. +* A way to track "generations" (in case an old ActivePS still lives when the + next one starts, so that in-flight data goes to the correct place.) +* Requested features: Buffer capacity, periodic sampling interval, feature set, + list of threads to profile, optional: specific tab to profile. +* The profile data storage buffer and its chunk manager (see "Storage" section + below for details.) +* More data about live and dead profiled threads. +* Optional counters for per-process CPU usage, and power usage. +* A pointer to the ``SamplerThread`` object (see "Periodic Sampling" section + below for details.) + +******* +Storage +******* + +During a session, the profiling data is serialized into a buffer, which is made +of "chunks", each of which contains "blocks", which have a size and the "entry" +data. + +During a profiling session, there is one main profile buffer, which may be +started by the Base Profiler, and then handed over to the Gecko Profiler when +the latter starts. + +The buffer is divided in chunks of equal size, which are allocated before they +are needed. When the data reaches a user-set limit, the oldest chunk is +recycled. This means that for long-enough profiling sessions, only the most +recent data (that could fit under the limit) is kept. + +Each chunk stores a sequence of blocks of variable length. The chunk itself +only knows where the first full block starts, and where the last block ends, +which is where the next block will be reserved. + +To add an entry to the buffer, a block is reserved, the size is written first +(so that readers can find the start of the next block), and then the entry bytes +are written. + +The following sessions give more technical details. + +leb128iterator.h +================ + +`This utility header `_ +contains some functions to read and write unsigned "LEB128" numbers +(`LEB128 on wikipedia `_). + +They are an efficient way to serialize numbers that are usually small, e.g., +numbers up to 127 only take one byte, two bytes up to 16,383, etc. + +ProfileBufferBlockIndex +======================= + +`A ProfileBufferBlockIndex object `_ +encapsulates a block index that is known to be the valid start of a block. It is +created when a block is reserved, or when trusted code computes the start of a +block in a chunk. + +The more generic +`ProfileBufferIndex `_ +type is used when working inside blocks. + +ProfileBufferChunk +================== + +`A ProfileBufferChunk `_ +is a variable-sized object. It contains: + +* A public copyable header, itself containing: + + * The local offset to the first full block (a chunk may start with the end of + a block that was started at the end of the previous chunk). That offset in + the very first chunk is the natural start to read all the data in the + buffer. + * The local offset past the last reserved block. This is where the next block + should be reserved, unless it points past the end of this chunk size. + * The timestamp when the chunk was first used. + * The timestamp when the chunk became full. + * The number of bytes that may be stored in this chunk. + * The number of reserved blocks. + * The global index where this chunk starts. + * The process ID writing into this chunk. + +* An owning unique pointer to the next chunk. It may be null for the last chunk + in a chain. + +* In ``DEBUG`` builds, a state variable, which is used to ensure that the chunk + goes through a known sequence of states (e.g., Created, then InUse, then + Done, etc.) See the sequence diagram + `where the member variable is defined `_. + +* The actual buffer data. + +Because a ProfileBufferChunk is variable-size, it must be created through its +static ``Create`` function, which takes care of allocating the correct amount +of bytes, at the correct alignment. + +Chunk Managers +============== + +ProfilerBufferChunkManager +-------------------------- + +`The ProfileBufferChunkManager abstract class `_ +defines the interface of classes that manage chunks. + +Concrete implementations are responsible for: +* Creating chunks for their user, with a mechanism to pre-allocate chunks before they are actually needed. +* Taking back and owning chunks when they are "released" (usually when full). +* Automatically destroying or recycling the oldest released chunks. +* Giving temporary access to extant released chunks. + +ProfileBufferChunkManagerSingle +------------------------------- + +`A ProfileBufferChunkManagerSingle object `_ +manages a single chunk. + +That chunk is always the same, it is never destroyed. The user may use it and +optionally release it. The manager can then be reset, and that one chunk will +be available again for use. + +A request for a second chunk would always fail. + +This manager is short-lived and not thread-safe. It is useful when there is some +limited data that needs to be captured without blocking the global profiling +buffer, usually one stack sample. This data may then be extracted and quickly +added to the global buffer. + +ProfileBufferChunkManagerWithLocalLimit +--------------------------------------- + +`A ProfileBufferChunkManagerWithLocalLimit object `_ +implements the ``ProfileBufferChunkManager`` interface fully, managing a number +of chunks, and making sure their total combined size stays under a given limit. +This is the main chunk manager user during a profiling session. + +Note: It also implements the ``ProfileBufferControlledChunkManager`` interface, +this is explained in the later section "Multi-Process Profiling". + +It is thread-safe, and one instance is shared by both Profilers. + +ProfileChunkedBuffer +==================== + +`A ProfileChunkedBuffer object `_ +uses a ``ProfilerBufferChunkManager`` to store data, and handles the different +C++ types of data that the Profilers want to read/write as entries in buffer +chunks. + +Its main function is ``ReserveAndPut``: + +* It takes an invocable object (like a lambda) that should return the size of + the entry to store, this is to potentially avoid costly operations just to + compute a size, when the profiler may not be running. +* It attempts to reserve the space in its chunks, requesting a new chunk if + necessary. +* It then calls a provided invocable object with a + `ProfileBufferEntryWriter `_, + which offers a range of functions to help serialize C++ objects. The + de/serialization functions are found in specializations of + `ProfileBufferEntryWriter::Serializer `_ + and + `ProfileBufferEntryReader::Deserializer `_. + +More "put" functions use ``ReserveAndPut`` to more easily serialize blocks of +memory, or C++ objects. + +``ProfileChunkedBuffer`` is optionally thread-safe, using a +``BaseProfilerMaybeMutex``. + +WIP note: Using a mutex makes this storage too noisy for profiling some +real-time (like audio processing). +`Bug 1697953 `_ will look +at switching to using atomic variables instead. +An alternative would be to use a totally separate non-thread-safe buffers for +each real-time thread that requires it (see +`bug 1754889 `_). + +ProfileBuffer +============= + +`A ProfileBuffer object `_ +uses a ``ProfileChunkedBuffer`` to store data, and handles the different kinds +of entries that the Profilers want to read/write. + +Each entry starts with a tag identifying a kind. These kinds can be found in +`ProfileBufferEntryKinds.h `_. + +There are "legacy" kinds, which are small fixed-length entries, such as: +Categories, labels, frame information, counters, etc. These can be stored in +`ProfileBufferEntry objects `_ + +And there are "modern" kinds, which have variable sizes, such as: Markers, CPU +running times, full stacks, etc. These are more directly handled by code that +can access the underlying ``ProfileChunkedBuffer``. + +The other major responsibility of a ``ProfileChunkedBuffer`` is to read back all +this data, sometimes during profiling (e.g., to duplicate a stack), but mainly +at the end of a session when generating the output JSON profile. + +***************** +Periodic Sampling +***************** + +Probably the most important job of the Profiler is to sample stacks of a number +of running threads, to help developers know which functions get used a lot when +performing some operation on Firefox. + +This is accomplished from a special thread, which regularly springs into action +and captures all this data. + +SamplerThread +============= + +`The SamplerThread object `_ +manages the information needed during sampling. It is created when the profiler +starts, and is stored inside the ``ActivePS``, see above for details. + +It includes: + +* A ``Sampler`` object that contains platform-specific details, which are + implemented in separate files like platform-win32.cpp, etc. +* The same generation index as its owning ``ActivePS``. +* The requested interval between samples. +* A handle to the thread where the sampling happens, its main function is + `Run() function `_. +* A list of callbacks to invoke after the next sampling. These may be used by + tests to wait for sampling to actually happen. +* The unregistered-thread-spy data, and an optional handle on another thread + that takes care of "spying" on unregistered thread (on platforms where that + operation is too expensive to run directly on the sampling thread). + +The ``Run()`` function takes care of performing the periodic sampling work: +(more details in the following sections) + +* Retrieve the sampling parameters. +* Instantiate a ``ProfileBuffer`` on the stack, to capture samples from other threads. +* Loop until a ``break``: + + * Lock the main profiler mutex, and do: + + * Check if sampling should stop, and break from the loop. + * Clean-up exit profiles (these are profiles sent from dying sub-processes, + and are kept for as long as they overlap with this process' own buffer range). + * Record the CPU utilization of the whole process. + * Record the power consumption. + * Sample each registered counter, including the memory counter. + * For each registered thread to be profiled: + + * Record the CPU utilization. + * If the thread is marked as "still sleeping", record a "same as before" + sample, otherwise suspend the thread and take a full stack sample. + * On some threads, record the event delay to compute the + (un)responsiveness. WIP note: This implementation may change. + + * Record profiling overhead durations. + + * Unlock the main profiler mutex. + * Invoke registered post-sampling callbacks. + * Spy on unregistered threads. + * Based on the requested sampling interval, and how much time this loop took, + compute when the next sampling loop should start, and make the thread sleep + for the appropriate amount of time. The goal is to be as regular as + possible, but if some/all loops take too much time, don't try too hard to + catch up, because the system is probably under stress already. + * Go back to the top of the loop. + +* If we're here, we hit a loop ``break`` above. +* Invoke registered post-sampling callbacks, to let them know that sampling + stopped. + +CPU Utilization +=============== + +CPU Utilization is stored as a number of milliseconds that a thread or process +has spent running on the CPU since the previous sampling. + +Implementations are platform-dependent, and can be found in +`the GetThreadRunningTimesDiff function `_ +and +`the GetProcessRunningTimesDiff function `_. + +Power Consumption +================= + +Energy probes added in 2022. + +Stacks +====== + +Stacks are the sequence of calls going from the entry point in the program +(generally ``main()`` and some OS-specific functions above), down to the +function where code is currently being executed. + +Native Frames +------------- + +Compiled code, from C++ and Rust source. + +Label Frames +------------ + +Pseudo-frames with arbitrary text, added from any language, mostly C++. + +JS, Wasm Frames +--------------- + +Frames corresponding to JavaScript functions. + +Java Frames +----------- + +Recorded by the JavaSampler. + +Stack Merging +------------- + +The above types of frames are all captured in different ways, and when finally +taking an actual stack sample (apart from Java), they get merged into one stack. + +All frames have an associated address in the call stack, and can therefore be +merged mostly by ordering them by this stack address. See +`MergeStacks `_ +for the implementation details. + +Counters +======== + +Counters are a special kind of probe, which can be continuously updated during +profiling, and the ``SamplerThread`` will sample their value at every loop. + +Memory Counter +-------------- + +This is the main counter. During a profiling session, hooks into the memory +manager keep track of each de/allocation, so at each sampling we know how many +operations were performed, and what is the current memory usage compared to the +previous sampling. + +Profiling Overhead +================== + +The ``SamplerThread`` records timestamps between parts of its sampling loop, and +records this as the sampling overhead. This may be useful to determine if the +profiler itself may have used too much of the computer resources, which could +skew the profile and give wrong impressions. + +Unregistered Thread Profiling +============================= + +At some intervals (not necessarily every sampling loop, depending on the OS), +the profiler may attempt to find unregistered threads, and record some +information about them. + +WIP note: This feature is experimental, and data is captured in markers on the +main thread. More work is needed to put this data in tracks like regular +registered threads, and capture more data like stack samples and markers. + +******* +Markers +******* + +Markers are events with a precise timestamp or time range, they have a name, a +category, options (out of a few choices), and optional marker-type-specific +payload data. + +Before describing the implementation, it is useful to be familiar with how +markers are natively added from C++, because this drives how the implementation +takes all this information and eventually outputs it in the final JSON profile. + +Adding Markers from C++ +======================= + +See https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html + +Implementation +============== + +The main function that records markers is +`profiler_add_marker `_. +It's a variadic templated function that takes the different the expected +arguments, first checks if the marker should actually be recorded (the profiler +should be running, and the target thread should be profiled), and then calls +into the deeper implementation function ``AddMarkerToBuffer`` with a reference +to the main profiler buffer. + +`AddMarkerToBuffer `_ +takes the marker type as an object, removes it from the function parameter list, +and calls the next function with the marker type as an explicit template +parameter, and also a pointer to the function that can capture the stack +(because it is different between Base and Gecko Profilers, in particular the +latter one knows about JS). + +From here, we enter the land of +`BaseProfilerMarkersDetail.h `_, +which employs some heavy template techniques, in order to most efficiently +serialize the given marker payload arguments, in order to make them +deserializable when outputting the final JSON. In previous implementations, for +each new marker type, a new C++ class derived from a payload abstract class was +required, that had to implement all the constructors and virtual functions to: + +* Create the payload object. +* Serialize the payload into the profile buffer. +* Deserialize from the profile buffer to a new payload object. +* Convert the payload into the final output JSON. + +Now, the templated functions automatically take care of serializing all given +function call arguments directly (instead of storing them somewhere first), and +preparing a deserialization function that will recreate them on the stack and +directly call the user-provided JSONification function with these arguments. + +Continuing from the public ``AddMarkerToBuffer``, +`mozilla::base_profiler_markers_detail::AddMarkerToBuffer `_ +sets some defaults if not specified by the caller: Target the current thread, +use the current time. + +Then if a stack capture was requested, attempt to do it in +the most efficient way, using a pre-allocated buffer if possible. + +WIP note: This potential allocation should be avoided in time-critical thread. +There is already a buffer for the main thread (because it's the busiest thread), +but there could be more pre-allocated threads, for specific real-time thread +that need it, or picked from a pool of pre-allocated buffers. See +`bug 1578792 `_. + +From there, `AddMarkerWithOptionalStackToBuffer `_ +handles ``NoPayload`` markers (usually added with ``PROFILER_MARKER_UNTYPED``) +in a special way, mostly to avoid the extra work associated with handling +payloads. Otherwise it continues with the following function. + +`MarkerTypeSerialization::Serialize `_ +retrieves the deserialization tag associated with the marker type. If it's the +first time this marker type is used, +`Streaming::TagForMarkerTypeFunctions `_ +adds it to the global list (which stores some function pointers used during +deserialization). + +Then the main serialization happens in +`StreamFunctionTypeHelper::Serialize `_. +Deconstructing this mouthful of an template: + +* ``MarkerType::StreamJSONMarkerData`` is the user-provided function that will + eventually produce the final JSON, but here it's only used to know the + parameter types that it expects. +* ``StreamFunctionTypeHelper`` takes that function prototype, and can extract + its argument by specializing on ```R(SpliceableJSONWriter&, As...)``, now + ``As...`` is a parameter pack matching the function parameters. +* Note that ``Serialize`` also takes a parameter pack, which contains all the + referenced arguments given to the top ``AddBufferToMarker`` call. These two + packs are supposed to match, at least the given arguments should be + convertible to the target pack parameter types. +* That specialization's ``Serialize`` function calls the buffer's ``PutObjects`` + variadic function to write all the marker data, that is: + + * The entry kind that must be at the beginning of every buffer entry, in this + case `ProfileBufferEntryKind::Marker `_. + * The common marker data (options first, name, category, deserialization tag). + * Then all the marker-type-specific arguments. Note that the C++ types + are those extracted from the deserialization function, so we know that + whatever is serialized here can be later deserialized using those same + types. + +The deserialization side is described in the later section "JSON output of +Markers". + +Adding Markers from Rust +======================== + +See https://firefox-source-docs.mozilla.org/tools/profiler/instrumenting-rust.html#adding-markers + +Adding Markers from JS +====================== + +See https://firefox-source-docs.mozilla.org/tools/profiler/instrumenting-javascript.html + +Adding Markers from Java +======================== + +See https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/ProfilerController.java + +************* +Profiling Log +************* + +During a profiling session, some profiler-related events may be recorded using +`ProfilingLog::Access `_. + +The resulting JSON object is added near the end of the process' JSON generation, +in a top-level property named "profilingLog". This object is free-form, and is +not intended to be displayed, or even read by most people. But it may include +interesting information for advanced users, or could be an early temporary +prototyping ground for new features. + +See "profileGatheringLog" for another log related to late events. + +WIP note: This was introduced shortly before this documentation, so at this time +it doesn't do much at all. + +*************** +Profile Capture +*************** + +Usually at the end of a profiling session, a profile is "captured", and either +saved to disk, or sent to the front-end https://profiler.firefox.com for +analysis. This section describes how the captured data is converted to the +Gecko Profiler JSON format. + +FailureLatch +============ + +`The FailureLatch interface `_ +is used during the JSON generation, in order to catch any unrecoverable error +(such as running Out Of Memory), to exit the process early, and to forward the +error to callers. + +There are two main implementations, suffixed "source" as they are the one source +of failure-handling, which is passed as ``FailureLatch&`` throughout the code: + +* `FailureLatchInfallibleSource `_ + is an "infallible" latch, meaning that it doesn't expect any failure. So if + a failure actually happened, the program would immediately terminate! (This + was the default behavior prior to introducing these latches.) +* `FailureLatchSource `_ + is a "fallible" latch, it will record the first failure that happens, and + "latch" into the failure state. The code should regularly examine this state, + and return early when possible. Eventually this failure state may be exposed + to end users. + +ProgressLogger, ProportionValue +=============================== + +`A ProgressLogger object `_ +is used to track the progress of a long operation, in this case the JSON +generation process. + +To match how the JSON generation code works (as a tree of C++ functions calls), +each ``ProgressLogger`` in a function usually records progress from 0 to 100% +locally inside that function. If that function calls a sub-function, it gives it +a sub-logger, which in the caller function is set to represent a local sub-range +(like 20% to 40%), but to the called function it will look like its own local +``ProgressLogger`` that goes from 0 to 100%. The very top ``ProgressLogger`` +converts the deepest local progress value to the corresponding global progress. + +Progress values are recorded in +`ProportionValue objects `_, +which effectively record fractional value with no loss of precision. + +This progress is most useful when the parent process is waiting for child +processes to do their work, to make sure progress does happen, otherwise to stop +waiting for frozen processes. More about that in the "Multi-Process Profiling" +section below. + +JSONWriter +========== + +`A JSONWriter object `_ +offers a simple way to create a JSON stream (start/end collections, add +elements, etc.), and calls back into a provided +`JSONWriteFunc interface `_ +to output characters. + +While these classes live outside of the Profiler directories, it may sometimes be +worth maintaining and/or modifying them to better serve the Profiler's needs. +But there are other users, so be careful not to break other things! + +SpliceableJSONWriter and SpliceableChunkedJSONWriter +==================================================== + +Because the Profiler deals with large amounts of data (big profiles can take +tens to hundreds of megabytes!), some specialized wrappers add better handling +of these large JSON streams. + +`SpliceableJSONWriter `_ +is a subclass of ``JSONWriter``, and allows the "splicing" of JSON strings, +i.e., being able to take a whole well-formed JSON string, and directly inserting +it as a JSON object in the target JSON being streamed. + +It also offers some functions that are often useful for the Profiler, such as: +* Converting a timestamp into a JSON object in the stream, taking care of keeping a nanosecond precision, without unwanted zeroes or nines at the end. +* Adding a number of null elements. +* Adding a unique string index, and add that string to a provided unique-string list if necessary. (More about UniqueStrings below.) + +`SpliceableChunkedJSONWriter `_ +is a subclass of ``SpliceableJSONWriter``. Its main attribute is that it provides its own writer +(`ChunkedJSONWriteFunc `_), +which stores the stream as a sequence of "chunks" (heap-allocated buffers). +It starts with a chunk of a default size, and writes incoming data into it, +later allocating more chunks as needed. This avoids having massive buffers being +resized all the time. + +It also offers the same splicing abilities as its parent class, but in case an +incoming JSON string comes from another ``SpliceableChunkedJSONWriter``, it's +able to just steal the chunks and add them to its list, thereby avoiding +expensive allocations and copies and destructions. + +UniqueStrings +============= + +Because a lot of strings would be repeated in profiles (e.g., frequent marker +names), such strings are stored in a separate JSON array of strings, and an +index into this list is used instead of that full string object. + +Note that these unique-string indices are currently only located in specific +spots in the JSON tree, they cannot be used just anywhere strings are accepted. + +`The UniqueJSONStrings class `_ +stores this list of unique strings in a ``SpliceableChunkedJSONWriter``. +Given a string, it takes care of storing it if encountered for the first time, +and inserts the index into a target ``SpliceableJSONWriter``. + +JSON Generation +=============== + +The "Gecko Profile Format" can be found at +https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md . + +The implementation in the back-end is +`locked_profiler_stream_json_for_this_process `_. +It outputs each JSON top-level JSON object, mostly in sequence. See the code for +how each object is output. Note that there is special handling for samples and +markers, as explained in the following section. + +ProcessStreamingContext and ThreadStreamingContext +-------------------------------------------------- + +In JSON profiles, samples and markers are separated by thread and by +samples/markers. Because there are potentially tens to a hundred threads, it +would be very costly to read the full profile buffer once for each of these +groups. So instead the buffer is read once, and all samples and markers are +handled as they are read, and their JSON output is sent to separate JSON +writers. + +`A ProcessStreamingContext object `_ +contains all the information to facilitate this output, including a list of +`ThreadStreamingContext's `_, +which each contain one ``SpliceableChunkedJSONWriter`` for the samples, and one +for the markers in this thread. + +When reading entries from the profile buffer, samples and markers are found by +their ``ProfileBufferEntryKind``, and as part of deserializing either kind (more +about each below), the thread ID is read, and determines which +``ThreadStreamingContext`` will receive the JSON output. + +At the end of this process, all ``SpliceableChunkedJSONWriters`` are efficiently +spliced (mainly a pointer move) into the final JSON output. + +JSON output of Samples +---------------------- + +This work is done in +`ProfileBuffer::DoStreamSamplesAndMarkersToJSON `_. + +From the main ``ProfileChunkedBuffer``, each entry is visited, its +``ProfileBufferEntryKind`` is read first, and for samples all frames from +captured stack are converted to the appropriate JSON. + +`A UniqueStacks object `_ +is used to de-duplicate frames and even sub-stacks: + +* Each unique frame string is written into a JSON array inside a + ``SpliceableChunkedJSONWriter``, and its index is the frame identifier. +* Each stack level is also de-duplicated, and identifies the associated frame + string, and points at the calling stack level (i.e., closer to the root). +* Finally, the identifier for the top of the stack is stored, along with a + timestamp (and potentially some more information) as the sample. + +For example, if we have collected the following samples: + +#. A -> B -> C +#. A -> B +#. A -> B -> D + +The frame table would contain each frame name, something like: +``["A", "B", "C", "D"]``. So the frame containing "A" has index 0, "B" is at 1, +etc. + +The stack table would contain each stack level, something like: +``[[0, null], [1, 0], [2, 1], [3, 1]]``. ``[0, null]`` means the frame is 0 +("A"), and it has no caller, it's the root frame. ``[1, 0]`` means the frame is +1 ("B"), and its caller is stack 0, which is just the previous one in this +example. + +And the three samples stored in the thread data would be therefore be: 2, 1, 3 +(E.g.: "2" points in the stack table at the frame [2,1] with "C", and from them +down to "B", then "A"). + +All this contains all the information needed to reconstruct all full stack +samples. + +JSON output of Markers +---------------------- + +This also happens +`inside ProfileBuffer::DoStreamSamplesAndMarkersToJSON `_. + +When a ``ProfileBufferEntryKind::Marker`` is encountered, +`the DeserializeAfterKindAndStream function `_ +reads the ``MarkerOptions`` (stored as explained above), which include the +thread ID, identifying which ``ThreadStreamingContext``'s +``SpliceableChunkedJSONWriter`` to use. + +After that, the common marker data (timing, category, etc.) is output. + +Then the ``Streaming::DeserializerTag`` identifies which type of marker this is. +The special case of ``0`` (no payload) means nothing more is output. + +Otherwise some more common data is output as part of the payload if present, in +particular the "inner window id" (used to match markers with specific html +frames), and stack. + +WIP note: Some of these may move around in the future, see +`bug 1774326 `_, +`bug 1774328 `_, and +others. + +In case of a C++-written payload, the ``DeserializerTag`` identifies the +``MarkerDataDeserializer`` function to use. This is part of the heavy templated +code in BaseProfilerMarkersDetail.h, the function is defined as +`MarkerTypeSerialization::Deserialize `_, +which outputs the marker type name, and then each marker payload argument. The +latter is done by using the user-defined ``MarkerType::StreamJSONMarkerData`` +parameter list, and recursively deserializing each parameter from the profile +buffer into an on-stack variable of a corresponding type, at the end of which +``MarkerType::StreamJSONMarkerData`` can be called with all of these arguments +at it expects, and that function does the actual JSON streaming as the user +programmed. + +************* +Profiler Stop +************* + +See "Profiler Start" and do the reverse! + +There is some special handling of the ``SampleThread`` object, just to ensure +that it gets deleted outside of the main profiler mutex being locked, otherwise +this could result in a deadlock (because it needs to take the lock before being +able to check the state variable indicating that the sampling loop and thread +should end). + +***************** +Profiler Shutdown +***************** + +See "Profiler Initialization" and do the reverse! + +One additional action is handling the optional ``MOZ_PROFILER_SHUTDOWN`` +environment variable, to output a profile if the profiler was running. + +*********************** +Multi-Process Profiling +*********************** + +All of the above explanations focused on what the profiler is doing is each +process: Starting, running and collecting samples, markers, and more data, +outputting JSON profiles, and stopping. + +But Firefox is a multi-process program, since +`Electrolysis aka e10s `_ introduce child +processes to handle web content and extensions, and especially since +`Fission `_ forced even parts of the +same webpage to run in separate processes, mainly for added security. Since then +Firefox can spawn many processes, sometimes 10 to 20 when visiting busy sites. + +The following sections explains how profiling Firefox as a whole works. + +IPC (Inter-Process Communication) +================================= + +See https://firefox-source-docs.mozilla.org/ipc/. + +As a quick summary, some message-passing function-like declarations live in +`PProfiler.ipdl `_, +and corresponding ``SendX`` and ``RecvX`` C++ functions are respectively +generated in +`PProfilerParent.h `_, +and virtually declared (for user implementation) in +`PProfilerChild.h `_. + +During Profiling +================ + +Exit profiles +------------- + +One IPC message that is not in PProfiler.ipdl, is +`ShutdownProfile `_ +in +`PContent.ipdl `_. + +It's called from +`ContentChild::ShutdownInternal `_, +just before a child process ends, and if the profiler was running, to ensure +that the profile data is collected and sent to the parent, for storage in its +``ActivePS``. + +See +`ActivePS::AddExitProfile `_ +for details. Note that the current "buffer position at gathering time" (which is +effectively the largest ``ProfileBufferBlockIndex`` that is present in the +global profile buffer) is recorded. Later, +`ClearExpiredExitProfiles `_ +looks at the **smallest** ``ProfileBufferBlockIndex`` still present in the +buffer (because early chunks may have been discarded to limit memory usage), and +discards exit profiles that were recorded before, because their data is now +older than anything stored in the parent. + +Profile Buffer Global Memory Control +------------------------------------ + +Each process runs its own profiler, with each its own profile chunked buffer. To +keep the overall memory usage of all these buffers under the user-picked limit, +processes work together, with the parent process overseeing things. + +Diagram showing the relevant classes, see details in the following sub-sections: + +.. image:: fissionprofiler-20200424.png + +ProfileBufferControlledChunkManager +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`The ProfileBufferControlledChunkManager interface `_ +allows a controller to get notified about all chunk updates, and to force the +destruction/recycling of old chunks. +`The ProfileBufferChunkManagerWithLocalLimit class `_ +implements it. + +`An Update object `_ +contains all information related to chunk changes: How much memory is currently +used by the local chunk manager, how much has been "released" (and therefore +could be destroyed/recycled), and a list of all chunks that were released since +the previous update; it also has a special state meaning that the child is +shutting down so there won't be updates anymore. An ``Update`` may be "folded" +into a previous one, to create a combined update equivalent to the two separate +ones one after the other. + +Update Handling in the ProfilerChild +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the profiler starts in a child process, the ``ProfilerChild`` +`starts to listen for updates `_. + +These updates are stored and folded into previous ones (if any). At some point, +`an AwaitNextChunkManagerUpdate message `_ +will be received, and any update can be forwarded to the parent. The local +update is cleared, ready to store future updates. + +Update Handling in the ProfilerParent +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the profiler starts AND when there are child processes, the +`ProfilerParent's ProfilerParentTracker `_ +creates +`a ProfileBufferGlobalController `_, +which starts to listen for updates from the local chunk manager. + +The ``ProfilerParentTracker`` is also responsible for keeping track of child +processes, and to regularly +`send them AwaitNextChunkManagerUpdate messages `_, +that the child's ``ProfilerChild`` answers to with updates. The update may +indicate that the child is shutting down, in which case the tracker will stop +tracking it. + +All these updates (from the local chunk manager, and from child processes' own +chunk managers) are processed in +`ProfileBufferGlobalController::HandleChunkManagerNonFinalUpdate `_. +Based on this stream of updates, it is possible to calculate the total memory +used by all profile buffers in all processes, and to keep track of all chunks +that have been "released" (i.e., are full, and can be destroyed). When the total +memory usage reaches the user-selected limit, the controller can lookup the +oldest chunk, and get it destroyed (either a local call for parent chunks, or by +sending +`a DestroyReleasedChunksAtOrBefore message `_ +to the owning child). + +Historical note: Prior to Fission, the Profiler used to keep one fixed-size +circular buffer in each process, but as Fission made the possible number of +processes unlimited, the memory consumption grew too fast, and required the +implementation of the above system. But there may still be mentions of +"circular buffers" in the code or documents; these have effectively been +replaced by chunked buffers, with centralized chunk control. + +Gathering Child Profiles +======================== + +When it's time to capture a full profile, the parent process performs its own +JSON generation (as described above), and sends +`a GatherProfile message `_ +to all child processes, which will make them generate their JSON profile and +send it back to the parent. + +All child profiles, including the exit profiles collected during profiling, are +stored as elements of a top-level array with property name "processes". + +During the gathering phase, while the parent is waiting for child responses, it +regularly sends +`GetGatherProfileProgress messages `_ +to all child processes that have not sent their profile yet, and the parent +expects responses within a short timeframe. The response carries a progress +value. If at some point two messages went with no progress was made anywhere +(either there was no response, or the progress value didn't change), the parent +assumes that remaining child processes may be frozen indefinitely, stops the +gathering and considers the JSON generation complete. + +During all of the above work, events are logged (especially issues with child +processes), and are added at the end of the JSON profile, in a top-level object +with property name "profileGatheringLog". This object is free-form, and is not +intended to be displayed, or even read by most people. But it may include +interesting information for advanced users regarding the profile-gathering +phase. diff --git a/tools/profiler/docs/fissionprofiler-20200424.png b/tools/profiler/docs/fissionprofiler-20200424.png new file mode 100644 index 0000000000..1602877a5b Binary files /dev/null and b/tools/profiler/docs/fissionprofiler-20200424.png differ diff --git a/tools/profiler/docs/fissionprofiler.umlet.uxf b/tools/profiler/docs/fissionprofiler.umlet.uxf new file mode 100644 index 0000000000..3325294e25 --- /dev/null +++ b/tools/profiler/docs/fissionprofiler.umlet.uxf @@ -0,0 +1,546 @@ + + + 10 + + UMLClass + + 70 + 110 + 300 + 70 + + /PProfilerParent/ +bg=light_gray +-- +*+SendAwaitNextChunkManagerUpdate()* +*+SendDestroyReleasedChunksAtOrBefore()* + + + + UMLClass + + 470 + 20 + 210 + 70 + + *ProfileBufferChunkMetadata* +bg=light_gray +-- ++doneTimeStamp ++bufferBytes + + + + + UMLClass + + 780 + 110 + 330 + 70 + + /PProfilerChild/ +bg=light_gray +-- +*/+RecvAwaitNextChunkManagerUpdate() = 0/* +*/+RecvDestroyReleasedChunksAtOrBefore() = 0/* + + + + + UMLClass + + 110 + 260 + 220 + 70 + + ProfilerParent +-- +*-processId* +-- + + + + + Relation + + 210 + 170 + 30 + 110 + + lt=<<- + 10.0;10.0;10.0;90.0 + + + UMLClass + + 740 + 250 + 410 + 90 + + ProfilerChild +-- +-UpdateStorage: unreleased bytes, released: {pid, rangeStart[ ]} +-- +*+RecvAwaitNextChunkUpdate()* +*+RecvDestroyReleasedChunksAtOrBefore()* + + + + + Relation + + 930 + 170 + 30 + 100 + + lt=<<- + 10.0;10.0;10.0;80.0 + + + UMLClass + + 110 + 400 + 220 + 70 + + ProfilerParentTracker +-- +_+Enumerate()_ +_*+ForChild()*_ + + + + Relation + + 210 + 320 + 190 + 100 + + lt=<- +m1=0..n +nsTArray<ProfilerParent*> + 10.0;10.0;10.0;80.0 + + + UMLClass + + 80 + 1070 + 150 + 30 + + ProfileBufferChunk + + + + UMLClass + + 380 + 1070 + 210 + 30 + + /ProfileBufferChunkManager/ + + + + UMLClass + + 180 + 900 + 700 + 50 + + ProfileBufferChunkManagerWithLocalLimit +-- +-mUpdateCallback + + + + Relation + + 480 + 940 + 30 + 150 + + lt=<<- + 10.0;130.0;10.0;10.0 + + + UMLClass + + 380 + 1200 + 210 + 30 + + ProfileChunkedBuffer + + + + Relation + + 410 + 1090 + 140 + 130 + + lt=->>>> +mChunkManager + 10.0;10.0;10.0;110.0 + + + UMLClass + + 960 + 1200 + 100 + 30 + + CorePS + + + + UMLClass + + 960 + 1040 + 100 + 30 + + ActivePS + + + + Relation + + 580 + 1200 + 400 + 40 + + lt=->>>>> +mCoreBuffer + 10.0;20.0;380.0;20.0 + + + Relation + + 870 + 940 + 250 + 120 + + lt=->>>>> +mProfileBufferChunkManager + 10.0;10.0;90.0;100.0 + + + UMLClass + + 830 + 1140 + 100 + 30 + + ProfileBuffer + + + + Relation + + 920 + 1060 + 130 + 110 + + lt=->>>>> +mProfileBuffer + 10.0;90.0;40.0;10.0 + + + Relation + + 580 + 1160 + 270 + 70 + + lt=->>>> +mEntries + 10.0;50.0;250.0;10.0 + + + Relation + + 90 + 1090 + 310 + 150 + + lt=->>>>> +m1=0..1 +mCurrentChunk: UniquePtr<> + 10.0;10.0;10.0;130.0;290.0;130.0 + + + Relation + + 210 + 1080 + 200 + 150 + + lt=->>>>> +m1=0..N +mNextChunks: UniquePtr<> + 20.0;10.0;170.0;130.0 + + + Relation + + 200 + 940 + 230 + 150 + + lt=->>>>> +m1=0..N +mReleasedChunks: UniquePtr<> + 10.0;130.0;10.0;10.0 + + + Relation + + 530 + 1090 + 270 + 130 + + lt=->>>>> +mOwnedChunkManager: UniquePtr<> + 10.0;10.0;10.0;110.0 + + + UMLClass + + 480 + 390 + 550 + 150 + + *ProfileBufferGlobalController* +-- +-mMaximumBytes +-mCurrentUnreleasedBytesTotal +-mCurrentUnreleasedBytes: {pid, unreleased bytes}[ ] sorted by pid +-mCurrentReleasedBytes +-mReleasedChunks: {doneTimeStamp, bytes, pid}[ ] sorted by timestamp +-mDestructionCallback: function<void(pid, rangeStart)> +-- ++Update(pid, unreleased bytes, released: ProfileBufferChunkMetadata[ ]) + + + + Relation + + 320 + 420 + 180 + 40 + + lt=->>>>> +mController + 160.0;20.0;10.0;20.0 + + + Relation + + 20 + 400 + 110 + 80 + + lt=->>>>> +_sInstance_ + 90.0;60.0;10.0;60.0;10.0;10.0;90.0;10.0 + + + UMLNote + + 480 + 250 + 220 + 120 + + The controller is only needed +if there *are* child processes, +so we can create it with the first +child (at which point the tracker +can register itself with the local +profiler), and destroyed with the +last child. +bg=blue + + + + Relation + + 690 + 330 + 100 + 80 + + + 10.0;10.0;80.0;60.0 + + + Relation + + 130 + 460 + 200 + 380 + + lt=->>>> +mParentChunkManager + 180.0;360.0;10.0;360.0;10.0;10.0 + + + Relation + + 740 + 330 + 350 + 510 + + lt=->>>> +mLocalBufferChunkManager + 10.0;490.0;330.0;490.0;330.0;10.0 + + + UMLClass + + 470 + 650 + 400 + 100 + + *ProfileBufferControlledChunkManager::Update* +-- +-mUnreleasedBytes +-mReleasedBytes +-mOldestDoneTimeStamp +-mNewReleasedChunks: ChunkMetadata[ ] + + + + UMLClass + + 470 + 560 + 400 + 60 + + *ProfileBufferControlledChunkManager::ChunkMetadata* +-- +-mDoneTimeStamp +-mBufferBytes + + + + Relation + + 670 + 610 + 30 + 60 + + lt=<. + 10.0;10.0;10.0;40.0 + + + Relation + + 670 + 740 + 30 + 60 + + lt=<. + 10.0;10.0;10.0;40.0 + + + Relation + + 670 + 50 + 130 + 110 + + lt=<. + 10.0;10.0;110.0;90.0 + + + Relation + + 360 + 50 + 130 + 110 + + lt=<. + 110.0;10.0;10.0;90.0 + + + UMLClass + + 400 + 130 + 350 + 100 + + *ProfileBufferChunkManagerUpdate* +bg=light_gray +-- +-unreleasedBytes +-releasedBytes +-oldestDoneTimeStamp +-newlyReleasedChunks: ProfileBufferChunkMetadata[ ] + + + + UMLClass + + 310 + 780 + 440 + 70 + + *ProfileBufferControlledChunkManager* +-- +*/+SetUpdateCallback(function<void(update: Update&&)>)/* +*/+DestroyChunksAtOrBefore(timeStamp)/* + + + + Relation + + 480 + 840 + 30 + 80 + + lt=<<- + 10.0;10.0;10.0;60.0 + + diff --git a/tools/profiler/docs/index.rst b/tools/profiler/docs/index.rst new file mode 100644 index 0000000000..53920e7d2f --- /dev/null +++ b/tools/profiler/docs/index.rst @@ -0,0 +1,37 @@ +Gecko Profiler +============== + +The Firefox Profiler is the collection of tools used to profile Firefox. This is backed +by the Gecko Profiler, which is the primarily C++ component that instruments Gecko. It +is configurable, and supports a variety of data sources and recording modes. Primarily, +it is used as a statistical profiler, where the execution of threads that have been +registered with the profile is paused, and a sample is taken. Generally, this includes +a stackwalk with combined native stack frame, JavaScript stack frames, and custom stack +frame labels. + +In addition to the sampling, the profiler can collect markers, which are collected +deterministically (as opposed to statistically, like samples). These include some +kind of text description, and optionally a payload with more information. + +This documentation serves to document the Gecko Profiler and Base Profiler components, +while the profiler.firefox.com interface is documented at `profiler.firefox.com/docs/ `_ + +.. toctree:: + :maxdepth: 1 + + code-overview + buffer + instrumenting-javascript + instrumenting-rust + markers-guide + memory + +The following areas still need documentation: + + * LUL + * Instrumenting Java + * Registering Threads + * Samples and Stack Walking + * Triggering Gecko Profiles in Automation + * JS Tracer + * Serialization diff --git a/tools/profiler/docs/instrumenting-javascript.rst b/tools/profiler/docs/instrumenting-javascript.rst new file mode 100644 index 0000000000..928d94781e --- /dev/null +++ b/tools/profiler/docs/instrumenting-javascript.rst @@ -0,0 +1,60 @@ +Instrumenting JavaScript +======================== + +There are multiple ways to use the profiler with JavaScript. There is the "JavaScript" +profiler feature (via about:profiling), which enables stack walking for JavaScript code. +This is most likely turned on already for every profiler preset. + +In addition, markers can be created to specifically marker an instant in time, or a +duration. This can be helpful to make sense of a particular piece of the front-end, +or record events that normally wouldn't show up in samples. + +.. note:: + This guide explains JavaScript markers in depth. To learn more about how to add a + marker in C++ or Rust, please take a look at their documentation + in :doc:`markers-guide` or :doc:`instrumenting-rust` respectively. + +Markers in Browser Chrome +************************* + +If you have access to ChromeUtils, then adding a marker is relatively easily. + +.. code-block:: javascript + + // Add an instant marker, representing a single point in time + ChromeUtils.addProfilerMarker("MarkerName"); + + // Add a duration marker, representing a span of time. + const startTime = Cu.now(); + doWork(); + ChromeUtils.addProfilerMarker("MarkerName", startTime); + + // Add a duration marker, representing a span of time, with some additional tex + const startTime = Cu.now(); + doWork(); + ChromeUtils.addProfilerMarker("MarkerName", startTime, "Details about this event"); + + // Add an instant marker, with some additional tex + const startTime = Cu.now(); + doWork(); + ChromeUtils.addProfilerMarker("MarkerName", undefined, "Details about this event"); + +Markers in Content Code +*********************** + +If instrumenting content code, then the `UserTiming`_ API is the best bet. +:code:`performance.mark` will create an instant marker, and a :code:`performance.measure` +will create a duration marker. These markers will show up under UserTiming in +the profiler UI. + +.. code-block:: javascript + + // Create an instant marker. + performance.mark("InstantMarkerName"); + + doWork(); + + // Measuring with the performance API will also create duration markers. + performance.measure("DurationMarkerName", "InstantMarkerName"); + +.. _UserTiming: https://developer.mozilla.org/en-US/docs/Web/API/User_Timing_API diff --git a/tools/profiler/docs/instrumenting-rust.rst b/tools/profiler/docs/instrumenting-rust.rst new file mode 100644 index 0000000000..0c5021eec1 --- /dev/null +++ b/tools/profiler/docs/instrumenting-rust.rst @@ -0,0 +1,433 @@ +Instrumenting Rust +================== + +There are multiple ways to use the profiler with Rust. Native stack sampling already +includes the Rust frames without special handling. There is the "Native Stacks" +profiler feature (via about:profiling), which enables stack walking for native code. +This is most likely turned on already for every profiler presets. + +In addition to that, there is a profiler Rust API to instrument the Rust code +and add more information to the profile data. There are three main functionalities +to use: + +1. Register Rust threads with the profiler, so the profiler can record these threads. +2. Add stack frame labels to annotate and categorize a part of the stack. +3. Add markers to specifically mark instants in time, or durations. This can be + helpful to make sense of a particular piece of the code, or record events that + normally wouldn't show up in samples. + +Crate to Include as a Dependency +-------------------------------- + +Profiler Rust API is located inside the ``gecko-profiler`` crate. This needs to +be included in the project dependencies before the following functionalities can +be used. + +To be able to include it, a new dependency entry needs to be added to the project's +``Cargo.toml`` file like this: + +.. code-block:: toml + + [dependencies] + gecko-profiler = { path = "../../tools/profiler/rust-api" } + +Note that the relative path needs to be updated depending on the project's location +in mozilla-central. + +Registering Threads +------------------- + +To be able to see the threads in the profile data, they need to be registered +with the profiler. Also, they need to be unregistered when they are exiting. +It's important to give a unique name to the thread, so they can be filtered easily. + +Registering and unregistering a thread is straightforward: + +.. code-block:: rust + + // Register it with a given name. + gecko_profiler::register_thread("Thread Name"); + // After doing some work, and right before exiting the thread, unregister it. + gecko_profiler::unregister_thread(); + +For example, here's how to register and unregister a simple thread: + +.. code-block:: rust + + let thread_name = "New Thread"; + std::thread::Builder::new() + .name(thread_name.into()) + .spawn(move || { + gecko_profiler::register_thread(thread_name); + // DO SOME WORK + gecko_profiler::unregister_thread(); + }) + .unwrap(); + +Or with a thread pool: + +.. code-block:: rust + + let worker = rayon::ThreadPoolBuilder::new() + .thread_name(move |idx| format!("Worker#{}", idx)) + .start_handler(move |idx| { + gecko_profiler::register_thread(&format!("Worker#{}", idx)); + }) + .exit_handler(|_idx| { + gecko_profiler::unregister_thread(); + }) + .build(); + +.. note:: + Registering a thread only will not make it appear in the profile data. In + addition, it needs to be added to the "Threads" filter in about:profiling. + This filter input is a comma-separated list. It matches partial names and + supports the wildcard ``*``. + +Adding Stack Frame Labels +------------------------- + +Stack frame labels are useful for annotating a part of the call stack with a +category. The category will appear in the various places on the Firefox Profiler +analysis page like timeline, call tree tab, flame graph tab, etc. + +``gecko_profiler_label!`` macro is used to add a new label frame. The added label +frame will exist between the call of this macro and the end of the current scope. + +Adding a stack frame label: + +.. code-block:: rust + + // Marking the stack as "Layout" category, no subcategory provided. + gecko_profiler_label!(Layout); + // Marking the stack as "JavaScript" category and "Parsing" subcategory. + gecko_profiler_label!(JavaScript, Parsing); + + // Or the entire function scope can be marked with a procedural macro. This is + // essentially a syntactical sugar and it expands into a function with a + // gecko_profiler_label! call at the very start: + #[gecko_profiler_fn_label(DOM)] + fn foo(bar: u32) -> u32 { + bar + } + +See the list of all profiling categories in the `profiling_categories.yaml`_ file. + +Adding Markers +-------------- + +Markers are packets of arbitrary data that are added to a profile by the Firefox code, +usually to indicate something important happening at a point in time, or during an interval of time. + +Each marker has a name, a category, some common optional information (timing, backtrace, etc.), +and an optional payload of a specific type (containing arbitrary data relevant to that type). + +.. note:: + This guide explains Rust markers in depth. To learn more about how to add a + marker in C++ or JavaScript, please take a look at their documentation + in :doc:`markers-guide` or :doc:`instrumenting-javascript` respectively. + +Examples +^^^^^^^^ + +Short examples, details are below. + +.. code-block:: rust + + // Record a simple marker with the category of Graphics, DisplayListBuilding. + gecko_profiler::add_untyped_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(Graphics, DisplayListBuilding), + // MarkerOptions that keeps options like marker timing and marker stack. + // It will be a point in type by default. + Default::default(), + ); + +.. code-block:: rust + + // Create a marker with some additional text information. + let info = "info about this marker"; + gecko_profiler::add_text_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(DOM), + // MarkerOptions that keeps options like marker timing and marker stack. + MarkerOptions { + timing: MarkerTiming::instant_now(), + ..Default::default() + }, + // Additional information as a string. + info, + ); + +.. code-block:: rust + + // Record a custom marker of type `ExampleNumberMarker` (see definition below). + gecko_profiler::add_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(Graphics, DisplayListBuilding), + // MarkerOptions that keeps options like marker timing and marker stack. + Default::default(), + // Marker payload. + ExampleNumberMarker { number: 5 }, + ); + + .... + + // Marker type definition. It needs to derive Serialize, Deserialize. + #[derive(Serialize, Deserialize, Debug)] + pub struct ExampleNumberMarker { + number: i32, + } + + // Marker payload needs to implement the ProfilerMarker trait. + impl gecko_profiler::ProfilerMarker for ExampleNumberMarker { + // Unique marker type name. + fn marker_type_name() -> &'static str { + "example number" + } + // Data specific to this marker type, serialized to JSON for profiler.firefox.com. + fn stream_json_marker_data(&self, json_writer: &mut gecko_profiler::JSONWriter) { + json_writer.int_property("number", self.number.into()); + } + // Where and how to display the marker and its data. + fn marker_type_display() -> gecko_profiler::MarkerSchema { + use gecko_profiler::marker::schema::*; + let mut schema = MarkerSchema::new(&[Location::MarkerChart]); + schema.set_chart_label("Name: {marker.name}"); + schema.add_key_label_format("number", "Number", Format::Integer); + schema + } + } + +Untyped Markers +^^^^^^^^^^^^^^^ + +Untyped markers don't carry any information apart from common marker data: +Name, category, options. + +.. code-block:: rust + + gecko_profiler::add_untyped_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(Graphics, DisplayListBuilding), + // MarkerOptions that keeps options like marker timing and marker stack. + MarkerOptions { + timing: MarkerTiming::instant_now(), + ..Default::default() + }, + ); + +1. Marker name + The first argument is the name of this marker. This will be displayed in most places + the marker is shown. It can be a literal string, or any dynamic string. +2. `Profiling category pair`_ + A category + subcategory pair from the `the list of categories`_. + ``gecko_profiler_category!`` macro should be used to create a profiling category + pair since it's easier to use, e.g. ``gecko_profiler_category!(JavaScript, Parsing)``. + Second parameter can be omitted to use the default subcategory directly. + ``gecko_profiler_category!`` macro is encouraged to use, but ``ProfilingCategoryPair`` + enum can also be used if needed. +3. `MarkerOptions`_ + See the options below. It can be omitted if there are no arguments with ``Default::default()``. + Some options can also be omitted, ``MarkerOptions {, ..Default::default()}``, + with one or more of the following options types: + + * `MarkerTiming`_ + This specifies an instant or interval of time. It defaults to the current instant if + left unspecified. Otherwise use ``MarkerTiming::instant_at(ProfilerTime)`` or + ``MarkerTiming::interval(pt1, pt2)``; timestamps are usually captured with + ``ProfilerTime::Now()``. It is also possible to record only the start or the end of an + interval, pairs of start/end markers will be matched by their name. + * `MarkerStack`_ + By default, markers do not record a "stack" (or "backtrace"). To record a stack at + this point, in the most efficient manner, specify ``MarkerStack::Full``. To + capture a stack without native frames for reduced overhead, specify + ``MarkerStack::NonNative``. + + *Note: Currently, all C++ marker options are not present in the Rust side. They will + be added in the future.* + +Text Markers +^^^^^^^^^^^^ + +Text markers are very common, they carry an extra text as a fourth argument, in addition to +the marker name. Use the following macro: + +.. code-block:: rust + + let info = "info about this marker"; + gecko_profiler::add_text_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(DOM), + // MarkerOptions that keeps options like marker timing and marker stack. + MarkerOptions { + stack: MarkerStack::Full, + ..Default::default() + }, + // Additional information as a string. + info, + ); + +As useful as it is, using an expensive ``format!`` operation to generate a complex text +comes with a variety of issues. It can leak potentially sensitive information +such as URLs during the profile sharing step. profiler.firefox.com cannot +access the information programmatically. It won't get the formatting benefits of the +built-in marker schema. Please consider using a custom marker type to separate and +better present the data. + +Other Typed Markers +^^^^^^^^^^^^^^^^^^^ + +From Rust code, a marker of some type ``YourMarker`` (details about type definition follow) can be +recorded like this: + +.. code-block:: rust + + gecko_profiler::add_marker( + // Name of the marker as a string. + "Marker Name", + // Category with an optional sub-category. + gecko_profiler_category!(JavaScript), + // MarkerOptions that keeps options like marker timing and marker stack. + Default::default(), + // Marker payload. + YourMarker { number: 5, text: "some string".to_string() }, + ); + +After the first three common arguments (like in ``gecko_profiler::add_untyped_marker``), +there is a marker payload struct and it needs to be defined. Let's take a look at +how to define it. + +How to Define New Marker Types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each marker type must be defined once and only once. +The definition is a Rust ``struct``, it's constructed when recording markers of +that type in Rust. Each marker struct holds the data that is required for them +to show in the profiler.firefox.com. +By convention, the suffix "Marker" is recommended to better distinguish them +from non-profiler entities in the source. + +Each marker payload must derive ``serde::Serialize`` and ``serde::Deserialize``. +They are also exported from ``gecko-profiler`` crate if a project doesn't have it. +Each marker payload should include its data as its fields like this: + +.. code-block:: rust + + #[derive(Serialize, Deserialize, Debug)] + pub struct YourMarker { + number: i32, + text: String, + } + +Each marker struct must also implement the `ProfilerMarker`_ trait. + +``ProfilerMarker`` trait +************************ + +`ProfilerMarker`_ trait must be implemented for all marker types. Its methods are +similar to C++ counterparts, please refer to :ref:`the C++ markers guide to learn +more about them `. It includes three methods that +needs to be implemented: + +1. ``marker_type_name() -> &'static str``: + A marker type must have a unique name, it is used to keep track of the type of + markers in the profiler storage, and to identify them uniquely on profiler.firefox.com. + (It does not need to be the same as the struct's name.) + + E.g.: + + .. code-block:: rust + + fn marker_type_name() -> &'static str { + "your marker type" + } + +2. ``stream_json_marker_data(&self, json_writer: &mut JSONWriter)`` + All markers of any type have some common data: A name, a category, options like + timing, etc. as previously explained. + + In addition, a certain marker type may carry zero of more arbitrary pieces of + information, and they are always the same for all markers of that type. + + These are defined in a special static member function ``stream_json_marker_data``. + + It's a member method and takes a ``&mut JSONWriter`` as a parameter, + it will be used to stream the data as JSON, to later be read by + profiler.firefox.com. See `JSONWriter object and its methods`_. + + E.g.: + + .. code-block:: rust + + fn stream_json_marker_data(&self, json_writer: &mut JSONWriter) { + json_writer.int_property("number", self.number.into()); + json_writer.string_property("text", &self.text); + } + +3. ``marker_type_display() -> schema::MarkerSchema`` + Now that how to stream type-specific data (from Firefox to + profiler.firefox.com) is defined, it needs to be described where and how this + data will be displayed on profiler.firefox.com. + + The static member function ``marker_type_display`` returns an opaque ``MarkerSchema`` + object, which will be forwarded to profiler.firefox.com. + + See the `MarkerSchema::Location enumeration for the full list`_. Also see the + `MarkerSchema struct for its possible methods`_. + + E.g.: + + .. code-block:: rust + + fn marker_type_display() -> schema::MarkerSchema { + // Import MarkerSchema related types for easier use. + use crate::marker::schema::*; + // Create a MarkerSchema struct with a list of locations provided. + // One or more constructor arguments determine where this marker will be displayed in + // the profiler.firefox.com UI. + let mut schema = MarkerSchema::new(&[Location::MarkerChart]); + + // Some labels can optionally be specified, to display certain information in different + // locations: set_chart_label, set_tooltip_label, and set_table_label``; or + // set_all_labels to define all of them the same way. + schema.set_all_labels("{marker.name} - {marker.data.number}); + + // Next, define the main display of marker data, which will appear in the Marker Chart + // tooltips and the Marker Table sidebar. + schema.add_key_label_format("number", "Number", Format::Number); + schema.add_key_label_format("text", "Text", Format::String); + schema.add_static_label_value("Help", "This is my own marker type"); + + // Lastly, return the created schema. + schema + } + + Note that the strings in ``set_all_labels`` may refer to marker data within braces: + + * ``{marker.name}``: Marker name. + * ``{marker.data.X}``: Type-specific data, as streamed with property name "X" + from ``stream_json_marker_data``. + + :ref:`See the C++ markers guide for more details about it `. + +.. _profiling_categories.yaml: https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/build/profiling_categories.yaml +.. _Profiling category pair: https://searchfox.org/mozilla-central/define?q=gecko_profiler::gecko_bindings::profiling_categories::ProfilingCategoryPair +.. _the list of categories: https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/build/profiling_categories.yaml +.. _MarkerOptions: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerOptions +.. _MarkerTiming: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerTiming +.. _MarkerStack: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerStack +.. _ProfilerMarker: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::ProfilerMarker +.. _MarkerSchema::Location enumeration for the full list: https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerSchema%3A%3ALocation +.. _JSONWriter object and its methods: https://searchfox.org/mozilla-central/define?q=gecko_profiler::json_writer::JSONWriter +.. _MarkerSchema struct for its possible methods: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::schema::MarkerSchema diff --git a/tools/profiler/docs/markers-guide.rst b/tools/profiler/docs/markers-guide.rst new file mode 100644 index 0000000000..82fe6f3cda --- /dev/null +++ b/tools/profiler/docs/markers-guide.rst @@ -0,0 +1,485 @@ +Markers +======= + +Markers are packets of arbitrary data that are added to a profile by the Firefox code, usually to +indicate something important happening at a point in time, or during an interval of time. + +Each marker has a name, a category, some common optional information (timing, backtrace, etc.), +and an optional payload of a specific type (containing arbitrary data relevant to that type). + +.. note:: + This guide explains C++ markers in depth. To learn more about how to add a + marker in JavaScript or Rust, please take a look at their documentation + in :doc:`instrumenting-javascript` or :doc:`instrumenting-rust` respectively. + +Example +------- + +Short example, details below. + +Note: Most marker-related identifiers are in the ``mozilla`` namespace, to be added where necessary. + +.. code-block:: c++ + + // Record a simple marker with the category of DOM. + PROFILER_MARKER_UNTYPED("Marker Name", DOM); + + // Create a marker with some additional text information. (Be wary of printf!) + PROFILER_MARKER_TEXT("Marker Name", JS, MarkerOptions{}, "Additional text information."); + + // Record a custom marker of type `ExampleNumberMarker` (see definition below). + PROFILER_MARKER("Number", OTHER, MarkerOptions{}, ExampleNumberMarker, 42); + +.. code-block:: c++ + + // Marker type definition. + struct ExampleNumberMarker { + // Unique marker type name. + static constexpr Span MarkerTypeName() { return MakeStringSpan("number"); } + // Data specific to this marker type, serialized to JSON for profiler.firefox.com. + static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter, int aNumber) { + aWriter.IntProperty("number", aNumber); + } + // Where and how to display the marker and its data. + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema(MS::Location::MarkerChart, MS::Location::MarkerTable); + schema.SetChartLabel("Number: {marker.data.number}"); + schema.AddKeyLabelFormat("number", "Number", MS::Format::Number); + return schema; + } + }; + + +How to Record Markers +--------------------- + +Header to Include +^^^^^^^^^^^^^^^^^ + +If the compilation unit only defines and records untyped, text, and/or its own markers, include +`the main profiler markers header `_: + +.. code-block:: c++ + + #include "mozilla/ProfilerMarkers.h" + +If it also records one of the other common markers defined in +`ProfilerMarkerTypes.h `_, +include that one instead: + +.. code-block:: c++ + + #include "mozilla/ProfilerMarkerTypes.h" + +And if it uses any other profiler functions (e.g., labels), use +`the main Gecko Profiler header `_ +instead: + +.. code-block:: c++ + + #include "GeckoProfiler.h" + +The above works from source files that end up in libxul, which is true for the majority +of Firefox source code. But some files live outside of libxul, such as mfbt, in which +case the advice is the same but the equivalent headers are from the Base Profiler instead: + +.. code-block:: c++ + + #include "mozilla/BaseProfilerMarkers.h" // Only own/untyped/text markers + #include "mozilla/BaseProfilerMarkerTypes.h" // Only common markers + #include "BaseProfiler.h" // Markers and other profiler functions + +Untyped Markers +^^^^^^^^^^^^^^^ + +Untyped markers don't carry any information apart from common marker data: +Name, category, options. + +.. code-block:: c++ + + PROFILER_MARKER_UNTYPED( + // Name, and category pair. + "Marker Name", OTHER, + // Marker options, may be omitted if all defaults are acceptable. + MarkerOptions(MarkerStack::Capture(), ...)); + +``PROFILER_MARKER_UNTYPED`` is a macro that simplifies the use of the main +``profiler_add_marker`` function, by adding the appropriate namespaces, and a surrounding +``#ifdef MOZ_GECKO_PROFILER`` guard. + +1. Marker name + The first argument is the name of this marker. This will be displayed in most places + the marker is shown. It can be a literal C string, or any dynamic string object. +2. `Category pair name `_ + Choose a category + subcategory from the `the list of categories `_. + This is the second parameter of each ``SUBCATEGORY`` line, for instance ``LAYOUT_Reflow``. + (Internally, this is really a `MarkerCategory `_ + object, in case you need to construct it elsewhere.) +3. `MarkerOptions `_ + See the options below. It can be omitted if there are no other arguments, ``{}``, or + ``MarkerOptions()`` (no specified options); only one of the following option types + alone; or ``MarkerOptions(...)`` with one or more of the following options types: + + * `MarkerThreadId `_ + Rarely used, as it defaults to the current thread. Otherwise it specifies the target + "thread id" (aka "track") where the marker should appear; This may be useful when + referring to something that happened on another thread (use ``profiler_current_thread_id()`` + from the original thread to get its id); or for some important markers, they may be + sent to the "main thread", which can be specified with ``MarkerThreadId::MainThread()``. + * `MarkerTiming `_ + This specifies an instant or interval of time. It defaults to the current instant if + left unspecified. Otherwise use ``MarkerTiming::InstantAt(timestamp)`` or + ``MarkerTiming::Interval(ts1, ts2)``; timestamps are usually captured with + ``TimeStamp::Now()``. It is also possible to record only the start or the end of an + interval, pairs of start/end markers will be matched by their name. *Note: The + upcoming "marker sets" feature will make this pairing more reliable, and also + allow more than two markers to be connected*. + * `MarkerStack `_ + By default, markers do not record a "stack" (or "backtrace"). To record a stack at + this point, in the most efficient manner, specify ``MarkerStack::Capture()``. To + record a previously captured stack, first store a stack into a + ``UniquePtr`` with ``profiler_capture_backtrace()``, then pass + it to the marker with ``MarkerStack::TakeBacktrace(std::move(stack))``. + * `MarkerInnerWindowId `_ + If you have access to an "inner window id", consider specifying it as an option, to + help profiler.firefox.com to classify them by tab. + +Text Markers +^^^^^^^^^^^^ + +Text markers are very common, they carry an extra text as a fourth argument, in addition to +the marker name. Use the following macro: + +.. code-block:: c++ + + PROFILER_MARKER_TEXT( + // Name, category pair, options. + "Marker Name", OTHER, {}, + // Text string. + "Here are some more details." + ); + +As useful as it is, using an expensive ``printf`` operation to generate a complex text +comes with a variety of issues string. It can leak potentially sensitive information +such as URLs can be leaked during the profile sharing step. profiler.firefox.com cannot +access the information programmatically. It won't get the formatting benefits of the +built-in marker schema. Please consider using a custom marker type to separate and +better present the data. + +Other Typed Markers +^^^^^^^^^^^^^^^^^^^ + +From C++ code, a marker of some type ``YourMarker`` (details about type definition follow) can be +recorded like this: + +.. code-block:: c++ + + PROFILER_MARKER( + "YourMarker name", OTHER, + MarkerOptions(MarkerTiming::IntervalUntilNowFrom(someStartTimestamp), + MarkerInnerWindowId(innerWindowId))), + YourMarker, "some string", 12345, "http://example.com", someTimeStamp); + +After the first three common arguments (like in ``PROFILER_MARKER_UNTYPED``), there are: + +4. The marker type, which is the name of the C++ ``struct`` that defines that type. +5. A variadic list of type-specific argument. They must match the number of, and must + be convertible to, ``StreamJSONMarkerData`` parameters as specified in the marker type definition. + +"Auto" Scoped Interval Markers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To capture time intervals around some important operations, it is common to store a timestamp, do the work, +and then record a marker, e.g.: + +.. code-block:: c++ + + void DoTimedWork() { + TimeStamp start = TimeStamp::Now(); + DoWork(); + PROFILER_MARKER_TEXT("Timed work", OTHER, MarkerTiming::IntervalUntilNowFrom(start), "Details"); + } + +`RAII `_ objects automate this, by recording the time +when the object is constructed, and later recording the marker when the object is destroyed at the end +of its C++ scope. +This is especially useful if there are multiple scope exit points. + +``AUTO_PROFILER_MARKER_TEXT`` is `the only one implemented `_ at this time. + +.. code-block:: c++ + + void MaybeDoTimedWork(bool aDoIt) { + AUTO_PROFILER_MARKER_TEXT("Timed work", OTHER, "Details"); + if (!aDoIt) { /* Marker recorded here... */ return; } + DoWork(); + /* ... or here. */ + } + +Note that these RAII objects only record one marker. In some situation, a very long +operation could be missed if it hasn't completed by the end of the profiling session. +In this case, consider recording two distinct markers, using +``MarkerTiming::IntervalStart()`` and ``MarkerTiming::IntervalEnd()``. + +Where to Define New Marker Types +-------------------------------- + +The first step is to determine the location of the marker type definition: + +* If this type is only used in one function, or a component, it can be defined in a + local common place relative to its use. +* For a more common type that could be used from multiple locations: + + * If there is no dependency on XUL, it can be defined in the Base Profiler, which can + be used in most locations in the codebase: + `mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h `__ + + * However, if there is a XUL dependency, then it needs to be defined in the Gecko Profiler: + `tools/profiler/public/ProfilerMarkerTypes.h `__ + +.. _how-to-define-new-marker-types: + +How to Define New Marker Types +------------------------------ + +Each marker type must be defined once and only once. +The definition is a C++ ``struct``, its identifier is used when recording +markers of that type in C++. +By convention, the suffix "Marker" is recommended to better distinguish them +from non-profiler entities in the source. + +.. code-block:: c++ + + struct YourMarker { + +Marker Type Name +^^^^^^^^^^^^^^^^ + +A marker type must have a unique name, it is used to keep track of the type of +markers in the profiler storage, and to identify them uniquely on profiler.firefox.com. +(It does not need to be the same as the ``struct``'s name.) + +This name is defined in a special static member function ``MarkerTypeName``: + +.. code-block:: c++ + + // … + static constexpr Span MarkerTypeName() { + return MakeStringSpan("YourMarker"); + } + +Marker Type Data +^^^^^^^^^^^^^^^^ + +All markers of any type have some common data: A name, a category, options like +timing, etc. as previously explained. + +In addition, a certain marker type may carry zero of more arbitrary pieces of +information, and they are always the same for all markers of that type. + +These are defined in a special static member function ``StreamJSONMarkerData``. + +The first function parameters is always ``SpliceableJSONWriter& aWriter``, +it will be used to stream the data as JSON, to later be read by +profiler.firefox.com. + +.. code-block:: c++ + + // … + static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter, + +The following function parameters is how the data is received as C++ objects +from the call sites. + +* Most C/C++ `POD (Plain Old Data) `_ + and `trivially-copyable `_ + types should work as-is, including ``TimeStamp``. +* Character strings should be passed using ``const ProfilerString8View&`` (this handles + literal strings, and various ``std::string`` and ``nsCString`` types, and spans with or + without null terminator). Use ``const ProfilerString16View&`` for 16-bit strings such as + ``nsString``. +* Other types can be used if they define specializations for ``ProfileBufferEntryWriter::Serializer`` + and ``ProfileBufferEntryReader::Deserializer``. You should rarely need to define new + ones, but if needed see how existing specializations are written, or contact the + `perf-tools team for help `_. + +Passing by value or by reference-to-const is recommended, because arguments are serialized +in binary form (i.e., there are no optimizable ``move`` operations). + +For example, here's how to handle a string, a 64-bit number, another string, and +a timestamp: + +.. code-block:: c++ + + // … + const ProfilerString8View& aString, + const int64_t aBytes, + const ProfilerString8View& aURL, + const TimeStamp& aTime) { + +Then the body of the function turns these parameters into a JSON stream. + +When this function is called, the writer has just started a JSON object, so +everything that is written should be a named object property. Use +``SpliceableJSONWriter`` functions, in most cases ``...Property`` functions +from its parent class ``JSONWriter``: ``NullProperty``, ``BoolProperty``, +``IntProperty``, ``DoubleProperty``, ``StringProperty``. (Other nested JSON +types like arrays or objects are not supported by the profiler.) + +As a special case, ``TimeStamps`` must be streamed using ``aWriter.TimeProperty(timestamp)``. + +The property names will be used to identify where each piece of data is stored and +how it should be displayed on profiler.firefox.com (see next section). + +Here's how the above functions parameters could be streamed: + +.. code-block:: c++ + + // … + aWriter.StringProperty("myString", aString); + aWriter.IntProperty("myBytes", aBytes); + aWriter.StringProperty("myURL", aURL); + aWriter.TimeProperty("myTime", aTime); + } + +.. _marker-type-display-schema: + +Marker Type Display Schema +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Now that we have defined how to stream type-specific data (from Firefox to +profiler.firefox.com), we need to describe where and how this data will be +displayed on profiler.firefox.com. + +The static member function ``MarkerTypeDisplay`` returns an opaque ``MarkerSchema`` +object, which will be forwarded to profiler.firefox.com. + +.. code-block:: c++ + + // … + static MarkerSchema MarkerTypeDisplay() { + +The ``MarkerSchema`` type will be used repeatedly, so for convenience we can define +a local type alias: + +.. code-block:: c++ + + // … + using MS = MarkerSchema; + +First, we construct the ``MarkerSchema`` object to be returned at the end. + +One or more constructor arguments determine where this marker will be displayed in +the profiler.firefox.com UI. See the `MarkerSchema::Location enumeration for the +full list `_. + +Here is the most common set of locations, showing markers of that type in both the +Marker Chart and the Marker Table panels: + +.. code-block:: c++ + + // … + MS schema(MS::Location::MarkerChart, MS::Location::MarkerTable); + +Some labels can optionally be specified, to display certain information in different +locations: ``SetChartLabel``, ``SetTooltipLabel``, and ``SetTableLabel``; or +``SetAllLabels`` to define all of them the same way. + +The arguments is a string that may refer to marker data within braces: + +* ``{marker.name}``: Marker name. +* ``{marker.data.X}``: Type-specific data, as streamed with property name "X" from ``StreamJSONMarkerData`` (e.g., ``aWriter.IntProperty("X", aNumber);`` + +For example, here's how to set the Marker Chart label to show the marker name and the +``myBytes`` number of bytes: + +.. code-block:: c++ + + // … + schema.SetChartLabel("{marker.name} – {marker.data.myBytes}"); + +profiler.firefox.com will apply the label with the data in a consistent manner. For +example, with this label definition, it could display marker information like the +following in the Firefox Profiler's Marker Chart: + + * "Marker Name – 10B" + * "Marker Name – 25.204KB" + * "Marker Name – 512.54MB" + +For implementation details on this processing, see `src/profiler-logic/marker-schema.js `_ +in the profiler's front-end. + +Next, define the main display of marker data, which will appear in the Marker +Chart tooltips and the Marker Table sidebar. + +Each row may either be: + +* A dynamic key-value pair, using one of the ``MarkerSchema::AddKey...`` functions. Each function is given: + + * Key: Element property name as streamed in ``StreamJSONMarkerData``. + * Label: Optional prefix. Defaults to the key name. + * Format: How to format the data element value, see `MarkerSchema::Format for details `_. + * Searchable: Optional boolean, indicates if the value is used in searches, defaults to false. + +* Or a fixed label and value strings, using ``MarkerSchema::AddStaticLabelValue``. + +.. code-block:: c++ + + // … + schema.AddKeyLabelFormatSearchable( + "myString", "My String", MS::Format::String, true); + schema.AddKeyLabelFormat( + "myBytes", "My Bytes", MS::Format::Bytes); + schema.AddKeyLabelFormat( + "myUrl", "My URL", MS::Format::Url); + schema.AddKeyLabelFormat( + "myTime", "Event time", MS::Format::Time); + +Finally the ``schema`` object is returned from the function: + +.. code-block:: c++ + + // … + return schema; + } + +Any other ``struct`` member function is ignored. There could be utility functions used by the above +compulsory functions, to make the code clearer. + +And that is the end of the marker definition ``struct``. + +.. code-block:: c++ + + // … + }; + +Performance Considerations +-------------------------- + +During profiling, it is best to reduce the amount of work spent doing profiler +operations, as they can influence the performance of the code that you want to profile. + +Whenever possible, consider passing simple types to marker functions, such that +``StreamJSONMarkerData`` will do the minimum amount of work necessary to serialize +the marker type-specific arguments to its internal buffer representation. POD types +(numbers) and strings are the easiest and cheapest to serialize. Look at the +corresponding ``ProfileBufferEntryWriter::Serializer`` specializations if you +want to better understand the work done. + +Avoid doing expensive operations when recording markers. E.g.: ``printf`` of +different things into a string, or complex computations; instead pass the +``printf``/computation arguments straight through to the marker function, so that +``StreamJSONMarkerData`` can do the expensive work at the end of the profiling session. + +Marker Architecture Description +------------------------------- + +The above sections should give all the information needed for adding your own marker +types. However, if you are wanting to work on the marker architecture itself, this +section will describe how the system works. + +TODO: + * Briefly describe the buffer and serialization. + * Describe the template strategy for generating marker types + * Describe the serialization and link to profiler front-end docs on marker processing (if they exist) diff --git a/tools/profiler/docs/memory.rst b/tools/profiler/docs/memory.rst new file mode 100644 index 0000000000..347a91f9e7 --- /dev/null +++ b/tools/profiler/docs/memory.rst @@ -0,0 +1,46 @@ +Profiling Memory +================ + +Sampling stacks from native allocations +--------------------------------------- + +The profiler can sample allocations and de-allocations from malloc using the +"Native Allocations" feature. This can be enabled by going to `about:profiling` and +enabling the "Native Allocations" checkbox. It is only available in Nightly, as it +uses a technique of hooking into malloc that could be a little more risky to apply to +the broader population of Firefox users. + +This implementation is located in: `tools/profiler/core/memory_hooks.cpp +`_ + +It works by hooking into all of the malloc calls. When the profiler is running, it +performs a `Bernoulli trial`_, that will pass for a given probability of per-byte +allocated. What this means is that larger allocations have a higher chance of being +recorded compared to smaller allocations. Currently, there is no way to configure +the per-byte probability. This means that sampled allocation sizes will be closer +to the actual allocated bytes. + +This infrastructure is quite similar to DMD, but with the additional motiviations of +making it easy to turn on and use with the profiler. The overhead is quite high, +especially on systems with more expensive stack walking, like Linux. Turning off +thee "Native Stacks" feature can help lower overhead, but will give less information. + +For more information on analyzing these profiles, see the `Firefox Profiler docs`_. + +Memory counters +--------------- + +Similar to the Native Allocations feature, memory counters use the malloc memory hook +that is only available in Nightly. When it's available, the memory counters are always +turned on. This is a lightweight way to count in a very granular fashion how much +memory is being allocated and deallocated during the profiling session. + +This information is then visualized in the `Firefox Profiler memory track`_. + +This feature uses the `Profiler Counters`_, which can be used to create other types +of cheap counting instrumentation. + +.. _Bernoulli trial: https://en.wikipedia.org/wiki/Bernoulli_trial +.. _Firefox Profiler docs: https://profiler.firefox.com/docs/#/./memory-allocations +.. _Firefox Profiler memory track: https://profiler.firefox.com/docs/#/./memory-allocations?id=memory-track +.. _Profiler Counters: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerCounts.h diff --git a/tools/profiler/docs/profilerclasses-20220913.png b/tools/profiler/docs/profilerclasses-20220913.png new file mode 100644 index 0000000000..a5ba265407 Binary files /dev/null and b/tools/profiler/docs/profilerclasses-20220913.png differ diff --git a/tools/profiler/docs/profilerclasses.umlet.uxf b/tools/profiler/docs/profilerclasses.umlet.uxf new file mode 100644 index 0000000000..c807853401 --- /dev/null +++ b/tools/profiler/docs/profilerclasses.umlet.uxf @@ -0,0 +1,811 @@ + + + 10 + + UMLClass + + 80 + 370 + 340 + 190 + + ThreadInfo +-- +-mName: nsCString +-mRegisterTime: TimeStamp +-mThreadId: int +-mIsMainThread: bool +-- +NS_INLINE_DECL_THREADSAFE_REFCOUNTING ++Name() ++RegisterTime() ++ThreadId() ++IsMainThread() + + + + + UMLClass + + 470 + 300 + 600 + 260 + + RacyRegisteredThread +-- +-mProfilingStackOwner: NotNull<RefPtr<ProfilingStackOwner>> +-mThreadId +-mSleep: Atomic<int> /* AWAKE, SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED */ +-mIsBeingProfiled: Atomic<bool, Relaxed> +-- ++SetIsBeingProfiled() ++IsBeingProfiled() ++ReinitializeOnResume() ++CanDuplicateLastSampleDueToSleep() ++SetSleeping() ++SetAwake() ++IsSleeping() ++ThreadId() ++ProfilingStack() ++ProfilingStackOwner() + + + + UMLClass + + 470 + 650 + 350 + 360 + + RegisteredThread +-- +-mPlatformData: UniquePlatformData +-mStackTop: const void* +-mThread: nsCOMPtr<nsIThread> +-mContext: JSContext* +-mJSSampling: enum {INACTIVE, ACTIVE_REQUESTED, ACTIVE, INACTIVE_REQUESTED} +-mmJSFlags: uint32_t +-- ++RacyRegisteredThread() ++GetPlatformData() ++StackTop() ++GetRunningEventDelay() ++SizeOfIncludingThis() ++SetJSContext() ++ClearJSContext() ++GetJSContext() ++Info(): RefPtr<ThreadInfo> ++GetEventTarget(): nsCOMPtr<nsIEventTarget> ++ResetMainThread(nsIThread*) ++StartJSSampling() ++StopJSSampling() ++PollJSSampling() + + + + + Relation + + 750 + 550 + 180 + 120 + + lt=<<<<<- +mRacyRegisteredThread + 10.0;100.0;10.0;10.0 + + + Relation + + 290 + 550 + 230 + 120 + + lt=<<<<- +mThreadInfo: RefPtr<> + 210.0;100.0;10.0;10.0 + + + UMLClass + + 70 + 660 + 340 + 190 + + PageInformation +-- +-mBrowsingContextID: uint64_t +-mInnerWindowID: uint64_t +-mUrl: nsCString +-mEmbedderInnerWindowID: uint64_t +-- +NS_INLINE_DECL_THREADSAFE_REFCOUNTING ++SizeOfIncludingThis(MallocSizeOf) ++Equals(PageInformation*) ++StreamJSON(SpliceableJSONWriter&) ++InnerWindowID() ++BrowsingContextID() ++Url() ++EmbedderInnerWindowID() ++BufferPositionWhenUnregistered(): Maybe<uint64_t> ++NotifyUnregistered(aBufferPosition: uint64_t) + + + + UMLClass + + 760 + 1890 + 570 + 120 + + ProfilerBacktrace +-- +-mName: UniqueFreePtr<char> +-mThreadId: int +-mProfileChunkedBuffer: UniquePtr<ProfileChunkedBuffer> +-mProfileBuffer: UniquePtr<ProfileBuffer> +-- ++StreamJSON(SpliceableJSONWriter&, aProcessStartTime: TimeStamp, UniqueStacks&) + + + + + UMLClass + + 20 + 2140 + 620 + 580 + + ProfileChunkedBuffer +-- +-mMutex: BaseProfilerMaybeMutex +-mChunkManager: ProfileBufferChunkManager* +-mOwnedChunkManager: UniquePtr<ProfileBufferChunkManager> +-mCurrentChunk: UniquePtr<ProfileBufferChunk> +-mNextChunks: UniquePtr<ProfileBufferChunk> +-mRequestedChunkHolder: RefPtr<RequestedChunkRefCountedHolder> +-mNextChunkRangeStart: ProfileBufferIndex +-mRangeStart: Atomic<ProfileBufferIndex, ReleaseAcquire> +-mRangeEnd: ProfileBufferIndex +-mPushedBlockCount: uint64_t +-mClearedBlockCount: Atomic<uint64_t, ReleaseAcquire> +-- ++Byte = ProfileBufferChunk::Byte ++Length = ProfileBufferChunk::Length ++IsThreadSafe() ++IsInSession() ++ResetChunkManager() ++SetChunkManager() ++Clear() ++BufferLength(): Maybe<size_t> ++SizeOfExcludingThis(MallocSizeOf) ++SizeOfIncludingThis(MallocSizeOf) ++GetState() ++IsThreadSafeAndLockedOnCurrentThread(): bool ++LockAndRun(Callback&&) ++ReserveAndPut(CallbackEntryBytes&&, Callback<auto(Maybe<ProfileBufferEntryWriter>&)>&&) ++Put(aEntryBytes: Length, Callback<auto(Maybe<ProfileBufferEntryWriter>&)>&&) ++PutFrom(const void*, Length) ++PutObjects(const Ts&...) ++PutObject(const T&) ++GetAllChunks() ++Read(Callback<void(Reader&)>&&): bool ++ReadEach(Callback<void(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])>&&) ++ReadAt(ProfileBufferBlockIndex, Callback<void(Maybe<ProfileBufferEntryReader>&&)>&&) ++AppendContents + + + + UMLClass + + 810 + 2100 + 500 + 620 + + ProfileBufferChunk +-- ++Header: { + mOffsetFirstBlock; mOffsetPastLastBlock; mDoneTimeStamp; + mBufferBytes; mBlockCount; mRangeStart; mProcessId; + } +-InternalHeader: { mHeader: Header; mNext: UniquePtr<ProfileBufferChunk>; } +-- +-mInternalHeader: InternalHeader +-mBuffer: Byte /* First byte */ +-- ++Byte = uint8_t ++Length = uint32_t ++SpanOfBytes = Span<Byte> +/+Create(aMinBufferBytes: Length): UniquePtr<ProfileBufferChunk>/ ++ReserveInitialBlockAsTail(Length): SpanOfBytes ++ReserveBlock(Length): { SpanOfBytes, ProfileBufferBlockIndex } ++MarkDone() ++MarkRecycled() ++ChunkHeader() ++BufferBytes() ++ChunkBytes() ++SizeOfExcludingThis(MallocSizeOf) ++SizeOfIncludingThis(MallocSizeOf) ++RemainingBytes(): Length ++OffsetFirstBlock(): Length ++OffsetPastLastBlock(): Length ++BlockCount(): Length ++ProcessId(): int ++SetProcessId(int) ++RangeStart(): ProfileBufferIndex ++SetRangeStart(ProfileBufferIndex) ++BufferSpan(): Span<const Byte> ++ByteAt(aOffset: Length) ++GetNext(): maybe-const ProfileBufferChunk* ++ReleaseNext(): UniquePtr<ProfileBufferChunk> ++InsertNext(UniquePtr<ProfileBufferChunk>&&) ++Last(): const ProfileBufferChunk* ++SetLast(UniquePtr<ProfileBufferChunk>&&) +/+Join(UniquePtr<ProfileBufferChunk>&&, UniquePtr<ProfileBufferChunk>&&)/ + + + + + UMLClass + + 120 + 2850 + 570 + 350 + + ProfileBufferEntryReader +-- +-mCurrentSpan: SpanOfConstBytes +-mNextSpanOrEmpty: SpanOfConstBytes +-mCurrentBlockIndex: ProfileBufferBlockIndex +-mNextBlockIndex: ProfileBufferBlockIndex +-- ++RemainingBytes(): Length ++SetRemainingBytes(Length) ++CurrentBlockIndex(): ProfileBufferBlockIndex ++NextBlockIndex(): ProfileBufferBlockIndex ++EmptyIteratorAtOffset(Length): ProfileBufferEntryReader ++operator*(): const Byte& ++operator++(): ProfileBufferEntryReader& ++operator+=(Length): ProfileBufferEntryReader& ++operator==(const ProfileBufferEntryReader&) ++operator!=(const ProfileBufferEntryReader&) ++ReadULEB128<T>(): T ++ReadBytes(void*, Length) ++ReadIntoObject(T&) ++ReadIntoObjects(Ts&...) ++ReadObject<T>(): T + + + + UMLClass + + 740 + 2850 + 570 + 300 + + ProfileBufferEntryWriter +-- +-mCurrentSpan: SpanOfBytes +-mNextSpanOrEmpty: SpanOfBytes +-mCurrentBlockIndex: ProfileBufferBlockIndex +-mNextBlockIndex: ProfileBufferBlockIndex +-- ++RemainingBytes(): Length ++CurrentBlockIndex(): ProfileBufferBlockIndex ++NextBlockIndex(): ProfileBufferBlockIndex ++operator*(): Byte& ++operator++(): ProfileBufferEntryReader& ++operator+=(Length): ProfileBufferEntryReader& +/+ULEB128Size(T): unsigned/ ++WriteULEB128(T) +/+SumBytes(const Ts&...): Length/ ++WriteFromReader(ProfileBufferEntryReader&, Length) ++WriteObject(const T&) ++WriteObjects(const T&) + + + + UMLClass + + 120 + 3270 + 570 + 80 + + ProfileBufferEntryReader::Deserializer<T> +/to be specialized for all types read from ProfileBufferEntryReader/ +-- +/+ReadInto(ProfileBufferEntryReader&, T&)/ +/+Read<T>(ProfileBufferEntryReader&): T/ + + + + UMLClass + + 740 + 3270 + 570 + 80 + + ProfileBufferEntryWriter::Serializer<T> +/to be specialized for all types written into ProfileBufferEntryWriter/ +-- +/+Bytes(const T&): Length/ +/+Write(ProfileBufferEntryWriter&, const T&)/ + + + + Relation + + 330 + 2710 + 110 + 160 + + lt=.> +<<creates>> + 10.0;10.0;60.0;140.0 + + + Relation + + 430 + 2710 + 360 + 160 + + lt=.> +<<creates>> + 10.0;10.0;340.0;140.0 + + + Relation + + 660 + 2710 + 260 + 160 + + lt=.> +<<points into>> + 10.0;140.0;240.0;10.0 + + + Relation + + 870 + 2710 + 140 + 160 + + lt=.> +<<points into>> + 10.0;140.0;80.0;10.0 + + + Relation + + 630 + 2170 + 200 + 40 + + lt=<<<<- +mCurrentChunk + 10.0;20.0;180.0;20.0 + + + Relation + + 630 + 2230 + 200 + 40 + + lt=<<<<- +mNextChunks + 10.0;20.0;180.0;20.0 + + + Relation + + 1100 + 2030 + 170 + 90 + + lt=<<<<- +mInternalHeader.mNext + 10.0;70.0;10.0;20.0;150.0;20.0;150.0;70.0 + + + Relation + + 490 + 3190 + 70 + 100 + + lt=.> +<<uses>> + 10.0;10.0;10.0;80.0 + + + Relation + + 580 + 3190 + 230 + 100 + + lt=.> +<<uses>> + 10.0;10.0;210.0;80.0 + + + UMLClass + + 50 + 1620 + 570 + 410 + + ProfileBuffer +-- +-mFirstSamplingTimeNs: double +-mLastSamplingTimeNs: double +-mIntervalNs, etc.: ProfilerStats +-- ++IsThreadSafe(): bool ++AddEntry(const ProfileBufferEntry&): uint64_t ++AddThreadIdEntry(int): uint64_t ++PutObjects(Kind, const Ts&...): ProfileBufferBlockIndex ++CollectCodeLocation(...) ++AddJITInfoForRange(...) ++StreamSamplesToJSON(SpliceableJSONWriter&, aThreadId: int, aSinceTime: double, UniqueStacks&) ++StreamMarkersToJSON(SpliceableJSONWriter&, ...) ++StreamPausedRangesToJSON(SpliceableJSONWriter&, aSinceTime: double) ++StreamProfilerOverheadToJSON(SpliceableJSONWriter&, ...) ++StreamCountersToJSON(SpliceableJSONWriter&, ...) ++DuplicateLsstSample ++DiscardSamplesBeforeTime(aTime: double) ++GetEntry(aPosition: uint64_t): ProfileBufferEntry ++SizeOfExcludingThis(MallocSizeOf) ++SizeOfIncludingThis(MallocSizeOf) ++CollectOverheadStats(...) ++GetProfilerBufferInfo(): ProfilerBufferInfo ++BufferRangeStart(): uint64_t ++BufferRangeEnd(): uint64_t + + + + UMLClass + + 690 + 1620 + 230 + 60 + + ProfileBufferEntry +-- ++mKind: Kind ++mStorage: uint8_t[kNumChars=8] + + + + UMLClass + + 930 + 1620 + 440 + 130 + + UniqueJSONStrings +-- +-mStringTableWriter: SpliceableChunkedJSONWriter +-mStringHashToIndexMap: HashMap<HashNumber, uint32_t> +-- ++SpliceStringTableElements(SpliceableJSONWriter&) ++WriteProperty(JSONWriter&, aName: const char*, aStr: const char*) ++WriteElement(JSONWriter&, aStr: const char*) ++GetOrAddIndex(const char*): uint32_t + + + + UMLClass + + 680 + 1760 + 470 + 110 + + UniqueStack +-- +-mFrameTableWriter: SpliceableChunkedJSONWriter +-mFrameToIndexMap: HashMap<FrameKey, uint32_t, FrameKeyHasher> +-mStackTableWriter: SpliceableChunkedJSONWriter +-mStackToIndexMap: HashMap<StackKey, uint32_t, StackKeyHasher> +-mJITInfoRanges: Vector<JITFrameInfoForBufferRange> + + + + Relation + + 320 + 2020 + 230 + 140 + + lt=<<<<- +mEntries: ProfileChunkedBuffer& + 10.0;10.0;10.0;120.0 + + + Relation + + 610 + 1640 + 100 + 40 + + lt=.> +<<uses>> + 10.0;20.0;80.0;20.0 + + + Relation + + 610 + 1710 + 340 + 40 + + lt=.> +<<uses>> + 10.0;20.0;320.0;20.0 + + + Relation + + 610 + 1800 + 90 + 40 + + lt=.> +<<uses>> + 10.0;20.0;70.0;20.0 + + + Relation + + 610 + 1900 + 170 + 40 + + lt=<<<<- +mProfileBuffer + 150.0;20.0;10.0;20.0 + + + Relation + + 590 + 1940 + 250 + 220 + + lt=<<<<- +mProfileChunkedBuffer + 170.0;10.0;10.0;200.0 + + + UMLClass + + 20 + 1030 + 490 + 550 + + CorePS +-- +/-sInstance: CorePS*/ +-mMainThreadId: int +-mProcessStartTime: TimeStamp +-mCoreBuffer: ProfileChunkedBuffer +-mRegisteredThreads: Vector<UniquePtr<RegisteredThread>> +-mRegisteredPages: Vector<RefPtr<PageInformation>> +-mCounters: Vector<BaseProfilerCount*> +-mLul: UniquePtr<lul::LUL> /* linux only */ +-mProcessName: nsAutoCString +-mJsFrames: JsFrameBuffer +-- ++Create ++Destroy ++Exists(): bool ++AddSizeOf(...) ++MainThreadId() ++ProcessStartTime() ++CoreBuffer() ++RegisteredThreads(PSLockRef) ++JsFrames(PSLockRef) +/+AppendRegisteredThread(PSLockRef, UniquePtr<RegisteredThread>)/ +/+RemoveRegisteredThread(PSLockRef, RegisteredThread*)/ ++RegisteredPages(PSLockRef) +/+AppendRegisteredPage(PSLockRef, RefPtr<PageInformation>)/ +/+RemoveRegisteredPage(PSLockRef, aRegisteredInnerWindowID: uint64_t)/ +/+ClearRegisteredPages(PSLockRef)/ ++Counters(PSLockRef) ++AppendCounter ++RemoveCounter ++Lul(PSLockRef) ++SetLul(PSLockRef, UniquePtr<lul::LUL>) ++ProcessName(PSLockRef) ++SetProcessName(PSLockRef, const nsACString&) + + + + + Relation + + 20 + 1570 + 110 + 590 + + lt=<<<<<- +mCoreBuffer + 10.0;10.0;10.0;570.0 + + + Relation + + 160 + 840 + 150 + 210 + + lt=<<<<- +mRegisteredPages + 10.0;190.0;10.0;10.0 + + + Relation + + 250 + 840 + 240 + 210 + + lt=<<<<- +mRegisteredThreads + 10.0;190.0;220.0;10.0 + + + UMLClass + + 920 + 860 + 340 + 190 + + SamplerThread +-- +-mSampler: Sampler +-mActivityGeneration: uint32_t +-mIntervalMicroseconds: int +-mThread /* OS-specific */ +-mPostSamplingCallbackList: UniquePtr<PostSamplingCallbackListItem> +-- ++Run() ++Stop(PSLockRef) ++AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&&) + + + + UMLClass + + 1060 + 600 + 340 + 190 + + Sampler +-- +-mOldSigprofHandler: sigaction +-mMyPid: int +-mSamplerTid: int ++sSigHandlerCoordinator +-- ++Disable(PSLockRef) ++SuspendAndSampleAndResumeThread(PSLockRef, const RegisteredThread&, aNow: TimeStamp, const Func&) + + + + + Relation + + 1190 + 780 + 90 + 100 + + lt=<<<<<- +mSampler + 10.0;80.0;10.0;10.0 + + + UMLClass + + 610 + 1130 + 470 + 400 + + ActivePS +-- +/-sInstance: ActivePS*/ +-mGeneration: const uint32_t +/-sNextGeneration: uint32_t/ +-mCapacity: const PowerOfTwo +-mDuration: const Maybe<double> +-mInterval: const double /* milliseconds */ +-mFeatures: const uint32_t +-mFilters: Vector<std::string> +-mActiveBrowsingContextID: uint64_t +-mProfileBufferChunkManager: ProfileBufferChunkManagerWithLocalLimit +-mProfileBuffer: ProfileBuffer +-mLiveProfiledThreads: Vector<LiveProfiledThreadData> +-mDeadProfiledThreads: Vector<UniquePtr<ProfiledThreadData>> +-mDeadProfiledPages: Vector<RefPtr<PageInformation>> +-mSamplerThread: SamplerThread* const +-mInterposeObserver: RefPtr<ProfilerIOInterposeObserver> +-mPaused: bool +-mWasPaused: bool /* linux */ +-mBaseProfileThreads: UniquePtr<char[]> +-mGeckoIndexWhenBaseProfileAdded: ProfileBufferBlockIndex +-mExitProfiles: Vector<ExitProfile> +-- ++ + + + + Relation + + 970 + 1040 + 140 + 110 + + lt=<<<<- +mSamplerThread + 10.0;90.0;10.0;10.0 + + + UMLNote + + 500 + 160 + 510 + 100 + + bg=red +This document pre-dates the generated image profilerclasses-20220913.png! +Unfortunately, the changes to make the image were lost. + +This previous version may still be useful to start reconstructing the image, +if there is a need to update it. + + + diff --git a/tools/profiler/docs/profilerthreadregistration-20220913.png b/tools/profiler/docs/profilerthreadregistration-20220913.png new file mode 100644 index 0000000000..8f7049d743 Binary files /dev/null and b/tools/profiler/docs/profilerthreadregistration-20220913.png differ diff --git a/tools/profiler/docs/profilerthreadregistration.umlet.uxf b/tools/profiler/docs/profilerthreadregistration.umlet.uxf new file mode 100644 index 0000000000..3e07215db4 --- /dev/null +++ b/tools/profiler/docs/profilerthreadregistration.umlet.uxf @@ -0,0 +1,710 @@ + + + 10 + + UMLClass + + 200 + 330 + 370 + 250 + + ThreadRegistry::OffThreadRef +-- ++UnlockedConstReaderCRef() const ++WithUnlockedConstReader(F&& aF) const ++UnlockedConstReaderAndAtomicRWCRef() const ++WithUnlockedConstReaderAndAtomicRW(F&& aF) const ++UnlockedConstReaderAndAtomicRWRef() ++WithUnlockedConstReaderAndAtomicRW(F&& aF) ++UnlockedRWForLockedProfilerCRef() ++WithUnlockedRWForLockedProfiler(F&& aF) ++UnlockedRWForLockedProfilerRef() ++WithUnlockedRWForLockedProfiler(F&& aF) ++ConstLockedRWFromAnyThread() ++WithConstLockedRWFromAnyThread(F&& aF) ++LockedRWFromAnyThread() ++WithLockedRWFromAnyThread(F&& aF) + + + + UMLClass + + 310 + 80 + 560 + 160 + + ThreadRegistry +-- +-sRegistryMutex: RegistryMutex (aka BaseProfilerSharedMutex) +/exclusive lock used during un/registration, shared lock for other accesses/ +-- +friend class ThreadRegistration +-Register(ThreadRegistration::OnThreadRef) +-Unregister(ThreadRegistration::OnThreadRef) +-- ++WithOffThreadRef(ProfilerThreadId, auto&& aF) static ++WithOffThreadRefOr(ProfilerThreadId, auto&& aF, auto&& aFallbackReturn) static: auto + + + + UMLClass + + 310 + 630 + 530 + 260 + + ThreadRegistration +-- +-mDataMutex: DataMutex (aka BaseProfilerMutex) +-mIsOnHeap: bool +-mIsRegistryLockedSharedOnThisThread: bool +-tlsThreadRegistration: MOZ_THREAD_LOCAL(ThreadRegistration*) +-GetTLS() static: tlsThreadRegistration* +-GetFromTLS() static: ThreadRegistration* +-- ++ThreadRegistration(const char* aName, const void* aStackTop) ++~ThreadRegistration() ++RegisterThread(const char* aName, const void* aStackTop) static: ProfilingStack* ++UnregisterThread() static ++IsRegistered() static: bool ++GetOnThreadPtr() static OnThreadPtr ++WithOnThreadRefOr(auto&& aF, auto&& aFallbackReturn) static: auto ++IsDataMutexLockedOnCurrentThread() static: bool + + + + UMLClass + + 880 + 620 + 450 + 290 + + ThreadRegistration::OnThreadRef +-- ++UnlockedConstReaderCRef() const ++WithUnlockedConstReader(auto&& aF) const: auto ++UnlockedConstReaderAndAtomicRWCRef() const ++WithUnlockedConstReaderAndAtomicRW(auto&& aF) const: auto ++UnlockedConstReaderAndAtomicRWRef() ++WithUnlockedConstReaderAndAtomicRW(auto&& aF): auto ++UnlockedRWForLockedProfilerCRef() const ++WithUnlockedRWForLockedProfiler(auto&& aF) const: auto ++UnlockedRWForLockedProfilerRef() ++WithUnlockedRWForLockedProfiler(auto&& aF): auto ++UnlockedReaderAndAtomicRWOnThreadCRef() const ++WithUnlockedReaderAndAtomicRWOnThread(auto&& aF) const: auto ++UnlockedReaderAndAtomicRWOnThreadRef() ++WithUnlockedReaderAndAtomicRWOnThread(auto&& aF): auto ++RWOnThreadWithLock LockedRWOnThread() ++WithLockedRWOnThread(auto&& aF): auto + + + + UMLClass + + 1040 + 440 + 230 + 70 + + ThreadRegistration::OnThreadPtr +-- ++operator*(): OnThreadRef ++operator->(): OnThreadRef + + + + UMLClass + + 450 + 940 + 350 + 240 + + ThreadRegistrationData +-- +-mProfilingStack: ProfilingStack +-mStackTop: const void* const +-mThread: nsCOMPtr<nsIThread> +-mJSContext: JSContext* +-mJsFrameBuffer: JsFrame* +-mJSFlags: uint32_t +-Sleep: Atomic<int> +-mThreadCpuTimeInNsAtLastSleep: Atomic<uint64_t> +-mWakeCount: Atomic<uint64_t, Relaxed> +-mRecordWakeCountMutex: BaseProfilerMutex +-mAlreadyRecordedWakeCount: uint64_t +-mAlreadyRecordedCpuTimeInMs: uin64_t +-mThreadProfilingFeatures: ThreadProfilingFeatures + + + + UMLClass + + 460 + 1220 + 330 + 80 + + ThreadRegistrationUnlockedConstReader +-- ++Info() const: const ThreadRegistrationInfo& ++PlatformDataCRef() const: const PlatformData& ++StackTop() const: const void* + + + + UMLClass + + 440 + 1340 + 370 + 190 + + ThreadRegistrationUnlockedConstReaderAndAtomicRW +-- ++ProfilingStackCRef() const: const ProfilingStack& ++ProfilingStackRef(): ProfilingStack& ++ProfilingFeatures() const: ThreadProfilingFeatures ++SetSleeping() ++SetAwake() ++GetNewCpuTimeInNs(): uint64_t ++RecordWakeCount() const ++ReinitializeOnResume() ++CanDuplicateLastSampleDueToSleep(): bool ++IsSleeping(): bool + + + + UMLClass + + 460 + 1570 + 330 + 60 + + ThreadRegistrationUnlockedRWForLockedProfiler +-- ++GetProfiledThreadData(): const ProfiledThreadData* ++GetProfiliedThreadData(): ProfiledThreadData* + + + + UMLClass + + 430 + 1670 + 390 + 50 + + ThreadRegistrationUnlockedReaderAndAtomicRWOnThread +-- ++GetJSContext(): JSContext* + + + + UMLClass + + 380 + 1840 + 490 + 190 + + ThreadRegistrationLockedRWFromAnyThread +-- ++SetProfilingFeaturesAndData( + ThreadProfilingFeatures, ProfiledThreadData*, const PSAutoLock&) ++ClearProfilingFeaturesAndData(const PSAutoLock&) ++GetJsFrameBuffer() const JsFrame* ++GetEventTarget() const: const nsCOMPtr<nsIEventTarget> ++ResetMainThread() ++GetRunningEventDelay(const TimeStamp&, TimeDuration&, TimeDuration&) ++StartJSSampling(uint32_t) ++StopJSSampling() + + + + UMLClass + + 490 + 2070 + 260 + 80 + + ThreadRegistrationLockedRWOnThread +-- ++SetJSContext(JSContext*) ++ClearJSContext() ++PollJSSampling() + + + + Relation + + 610 + 1170 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + UMLClass + + 500 + 2190 + 240 + 60 + + ThreadRegistration::EmbeddedData +-- + + + + Relation + + 610 + 1290 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + Relation + + 610 + 1520 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + Relation + + 610 + 1620 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + Relation + + 650 + 1710 + 30 + 150 + + lt=<<- + 10.0;10.0;10.0;130.0 + + + Relation + + 610 + 2020 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + Relation + + 610 + 2140 + 30 + 70 + + lt=<<- + 10.0;10.0;10.0;50.0 + + + Relation + + 340 + 880 + 180 + 1370 + + lt=->>>>> +mData + 160.0;1350.0;10.0;1350.0;10.0;10.0 + + + UMLClass + + 990 + 930 + 210 + 100 + + ThreadRegistrationInfo +-- ++Name(): const char* ++RegisterTime(): const TimeStamp& ++ThreadId(): ProfilerThreadId ++IsMainThread(): bool + + + + Relation + + 790 + 980 + 220 + 40 + + lt=->>>>> +mInfo + 200.0;20.0;10.0;20.0 + + + UMLClass + + 990 + 1040 + 210 + 50 + + PlatformData +-- + + + + + Relation + + 790 + 1040 + 220 + 40 + + lt=->>>>> +mPlatformData + 200.0;20.0;10.0;20.0 + + + UMLClass + + 990 + 1100 + 210 + 60 + + ProfiledThreadData +-- + + + + Relation + + 790 + 1100 + 220 + 40 + + lt=->>>> +mProfiledThreadData: * + 200.0;20.0;10.0;20.0 + + + Relation + + 710 + 480 + 350 + 170 + + lt=->>>> +m1=0..1 +mThreadRegistration: * + 10.0;150.0;330.0;10.0 + + + Relation + + 830 + 580 + 260 + 130 + + lt=->>>> +m1=1 +mThreadRegistration: * + 10.0;110.0;40.0;20.0;220.0;20.0;240.0;40.0 + + + Relation + + 1140 + 500 + 90 + 140 + + lt=<. +<creates> + 10.0;120.0;10.0;10.0 + + + Relation + + 780 + 900 + 450 + 380 + + lt=<. +<accesses> + 10.0;360.0;430.0;360.0;430.0;10.0 + + + Relation + + 800 + 900 + 510 + 560 + + lt=<. +<accesses> + 10.0;540.0;420.0;540.0;420.0;10.0 + + + Relation + + 780 + 900 + 540 + 720 + + lt=<. +<accesses> + 10.0;700.0;450.0;700.0;450.0;10.0 + + + Relation + + 810 + 900 + 520 + 820 + + lt=<. +<accesses> + 10.0;800.0;430.0;800.0;430.0;10.0 + + + UMLClass + + 900 + 2070 + 410 + 80 + + ThreadRegistration::OnThreadRef::ConstRWOnThreadWithLock +-- +-mDataLock: BaseProfilerAutoLock +-- ++DataCRef() const: ThreadRegistrationLockedRWOnThread& ++operator->() const: ThreadRegistrationLockedRWOnThread& + + + + Relation + + 740 + 2100 + 180 + 40 + + lt=->>>> +mLockedRWOnThread + 10.0;20.0;160.0;20.0 + + + Relation + + 1250 + 900 + 90 + 1190 + + lt=<. +<creates> + 10.0;1170.0;10.0;10.0 + + + Relation + + 660 + 440 + 400 + 210 + + lt=<. +<creates> + 380.0;10.0;10.0;190.0 + + + Relation + + 740 + 880 + 160 + 50 + + lt=<. +<creates> + 140.0;30.0;50.0;30.0;10.0;10.0 + + + Relation + + 460 + 230 + 150 + 120 + + lt=->>>> +m1=0..N +sRegistryContainer: +static Vector<> + 10.0;100.0;10.0;10.0 + + + UMLClass + + 800 + 250 + 470 + 150 + + ThreadRegistry::LockedRegistry +-- +-mRegistryLock: RegistryLockShared (aka BaseProfilerAutoLockShared) +-- ++LockedRegistry() ++~LockedRegistry() ++begin() const: const OffThreadRef* ++end() const: const OffThreadRef* ++begin(): OffThreadRef* ++end(): OffThreadRef* + + + + Relation + + 560 + 350 + 260 + 50 + + lt=<. +<accesses with +shared lock> + 10.0;20.0;240.0;20.0 + + + Relation + + 550 + 390 + 330 + 260 + + lt=<. +<updates +mIsRegistryLockedSharedOnThisThread> + 10.0;240.0;310.0;10.0 + + + Relation + + 330 + 570 + 170 + 80 + + lt=->>>> +m1=1 +mThreadRegistration: * + 120.0;60.0;40.0;10.0 + + + Relation + + 280 + 570 + 200 + 710 + + lt=<. +<accesses> + 180.0;690.0;10.0;690.0;10.0;10.0 + + + Relation + + 270 + 570 + 190 + 890 + + lt=<. +<accesses> + 170.0;870.0;10.0;870.0;10.0;10.0 + + + UMLClass + + 200 + 1740 + 440 + 80 + + ThreadRegistry::OffThreadRef::{,Const}RWFromAnyThreadWithLock +-- +-mDataLock: BaseProfilerAutoLock +-- ++DataCRef() {,const}: ThreadRegistrationLockedRWOnThread& ++operator->() {,const}: ThreadRegistrationLockedRWOnThread& + + + + Relation + + 250 + 570 + 90 + 1190 + + lt=<. +<creates> + 10.0;1170.0;10.0;10.0 + + + Relation + + 180 + 1810 + 220 + 120 + + lt=->>>> +mLockedRWFromAnyThread + 200.0;100.0;80.0;100.0;80.0;10.0 + + diff --git a/tools/profiler/gecko/ChildProfilerController.cpp b/tools/profiler/gecko/ChildProfilerController.cpp new file mode 100644 index 0000000000..f51cb9437d --- /dev/null +++ b/tools/profiler/gecko/ChildProfilerController.cpp @@ -0,0 +1,170 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ChildProfilerController.h" + +#include "ProfilerChild.h" + +#include "mozilla/ProfilerState.h" +#include "mozilla/ipc/Endpoint.h" +#include "nsExceptionHandler.h" +#include "nsIThread.h" +#include "nsThreadUtils.h" + +using namespace mozilla::ipc; + +namespace mozilla { + +/* static */ +already_AddRefed ChildProfilerController::Create( + mozilla::ipc::Endpoint&& aEndpoint) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + RefPtr cpc = new ChildProfilerController(); + cpc->Init(std::move(aEndpoint)); + return cpc.forget(); +} + +ChildProfilerController::ChildProfilerController() + : mThread(nullptr, "ChildProfilerController::mThread") { + MOZ_COUNT_CTOR(ChildProfilerController); +} + +void ChildProfilerController::Init(Endpoint&& aEndpoint) { + RefPtr newProfilerChildThread; + if (NS_SUCCEEDED(NS_NewNamedThread("ProfilerChild", + getter_AddRefs(newProfilerChildThread)))) { + { + auto lock = mThread.Lock(); + RefPtr& lockedmThread = lock.ref(); + MOZ_ASSERT(!lockedmThread, "There is already a ProfilerChild thread"); + // Copy ref'd ptr into mThread. Don't move/swap, so that + // newProfilerChildThread can be used below. + lockedmThread = newProfilerChildThread; + } + // Now that mThread has been set, run SetupProfilerChild on the thread. + newProfilerChildThread->Dispatch( + NewRunnableMethod&&>( + "ChildProfilerController::SetupProfilerChild", this, + &ChildProfilerController::SetupProfilerChild, std::move(aEndpoint)), + NS_DISPATCH_NORMAL); + } +} + +ProfileAndAdditionalInformation +ChildProfilerController::GrabShutdownProfileAndShutdown() { + ProfileAndAdditionalInformation profileAndAdditionalInformation; + ShutdownAndMaybeGrabShutdownProfileFirst(&profileAndAdditionalInformation); + return profileAndAdditionalInformation; +} + +void ChildProfilerController::Shutdown() { + ShutdownAndMaybeGrabShutdownProfileFirst(nullptr); +} + +void ChildProfilerController::ShutdownAndMaybeGrabShutdownProfileFirst( + ProfileAndAdditionalInformation* aOutShutdownProfileInformation) { + // First, get the owning reference out of mThread, so it cannot be used in + // ChildProfilerController after this (including re-entrantly during the + // profilerChildThread->Shutdown() inner event loop below). + RefPtr profilerChildThread; + { + auto lock = mThread.Lock(); + RefPtr& lockedmThread = lock.ref(); + lockedmThread.swap(profilerChildThread); + } + if (profilerChildThread) { + if (profiler_is_active()) { + CrashReporter::AnnotateCrashReport( + CrashReporter::Annotation::ProfilerChildShutdownPhase, + "Profiling - Dispatching ShutdownProfilerChild"_ns); + profilerChildThread->Dispatch( + NewRunnableMethod( + "ChildProfilerController::ShutdownProfilerChild", this, + &ChildProfilerController::ShutdownProfilerChild, + aOutShutdownProfileInformation), + NS_DISPATCH_NORMAL); + // Shut down the thread. This call will spin until all runnables + // (including the ShutdownProfilerChild runnable) have been processed. + profilerChildThread->Shutdown(); + } else { + CrashReporter::AnnotateCrashReport( + CrashReporter::Annotation::ProfilerChildShutdownPhase, + "Not profiling - Running ShutdownProfilerChild"_ns); + // If we're not profiling, this operation will be very quick, so it can be + // done synchronously. This avoids having to manually shutdown the thread, + // which runs a risky inner event loop, see bug 1613798. + NS_DispatchAndSpinEventLoopUntilComplete( + "ChildProfilerController::ShutdownProfilerChild SYNC"_ns, + profilerChildThread, + NewRunnableMethod( + "ChildProfilerController::ShutdownProfilerChild SYNC", this, + &ChildProfilerController::ShutdownProfilerChild, nullptr)); + } + // At this point, `profilerChildThread` should be the last reference to the + // thread, so it will now get destroyed. + } +} + +ChildProfilerController::~ChildProfilerController() { + MOZ_COUNT_DTOR(ChildProfilerController); + +#ifdef DEBUG + { + auto lock = mThread.Lock(); + RefPtr& lockedmThread = lock.ref(); + MOZ_ASSERT( + !lockedmThread, + "Please call Shutdown before destroying ChildProfilerController"); + } +#endif + MOZ_ASSERT(!mProfilerChild); +} + +void ChildProfilerController::SetupProfilerChild( + Endpoint&& aEndpoint) { + { + auto lock = mThread.Lock(); + RefPtr& lockedmThread = lock.ref(); + // We should be on the ProfilerChild thread. In rare cases, we could already + // be in shutdown, in which case mThread is null; we still need to continue, + // so that ShutdownProfilerChild can work on a valid mProfilerChild. + MOZ_RELEASE_ASSERT(!lockedmThread || + lockedmThread == NS_GetCurrentThread()); + } + MOZ_ASSERT(aEndpoint.IsValid()); + + mProfilerChild = new ProfilerChild(); + Endpoint endpoint = std::move(aEndpoint); + + if (!endpoint.Bind(mProfilerChild)) { + MOZ_CRASH("Failed to bind ProfilerChild!"); + } +} + +void ChildProfilerController::ShutdownProfilerChild( + ProfileAndAdditionalInformation* aOutShutdownProfileInformation) { + const bool isProfiling = profiler_is_active(); + if (aOutShutdownProfileInformation) { + CrashReporter::AnnotateCrashReport( + CrashReporter::Annotation::ProfilerChildShutdownPhase, + isProfiling ? "Profiling - GrabShutdownProfile"_ns + : "Not profiling - GrabShutdownProfile"_ns); + *aOutShutdownProfileInformation = mProfilerChild->GrabShutdownProfile(); + } + CrashReporter::AnnotateCrashReport( + CrashReporter::Annotation::ProfilerChildShutdownPhase, + isProfiling ? "Profiling - Destroying ProfilerChild"_ns + : "Not profiling - Destroying ProfilerChild"_ns); + mProfilerChild->Destroy(); + mProfilerChild = nullptr; + CrashReporter::AnnotateCrashReport( + CrashReporter::Annotation::ProfilerChildShutdownPhase, + isProfiling + ? "Profiling - ShutdownProfilerChild complete, waiting for thread shutdown"_ns + : "Not Profiling - ShutdownProfilerChild complete, waiting for thread shutdown"_ns); +} + +} // namespace mozilla diff --git a/tools/profiler/gecko/PProfiler.ipdl b/tools/profiler/gecko/PProfiler.ipdl new file mode 100644 index 0000000000..65778b892c --- /dev/null +++ b/tools/profiler/gecko/PProfiler.ipdl @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +include ProfilerTypes; + +namespace mozilla { + +// PProfiler is a top-level protocol. It is used to let the main process +// control the Gecko Profiler in other processes, and request profiles from +// those processes. +// It is a top-level protocol so that its child endpoint can be on a +// background thread, so that profiles can be gathered even if the main thread +// is unresponsive. +[ChildImpl=virtual, ParentImpl=virtual] +async protocol PProfiler +{ +child: + // The unused returned value is to have a promise we can await. + async Start(ProfilerInitParams params) returns (bool unused); + async EnsureStarted(ProfilerInitParams params) returns (bool unused); + async Stop() returns (bool unused); + async Pause() returns (bool unused); + async Resume() returns (bool unused); + async PauseSampling() returns (bool unused); + async ResumeSampling() returns (bool unused); + + async WaitOnePeriodicSampling() returns (bool sampled); + + async AwaitNextChunkManagerUpdate() returns (ProfileBufferChunkManagerUpdate update); + async DestroyReleasedChunksAtOrBefore(TimeStamp timeStamp); + + // The returned shmem may contain an empty string (unavailable), an error + // message starting with '*', or a profile as a stringified JSON object. + async GatherProfile() returns (IPCProfileAndAdditionalInformation profileAndAdditionalInformation); + async GetGatherProfileProgress() returns (GatherProfileProgress progress); + + async ClearAllPages(); +}; + +} // namespace mozilla + diff --git a/tools/profiler/gecko/ProfilerChild.cpp b/tools/profiler/gecko/ProfilerChild.cpp new file mode 100644 index 0000000000..db7ef99423 --- /dev/null +++ b/tools/profiler/gecko/ProfilerChild.cpp @@ -0,0 +1,565 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerChild.h" + +#include "GeckoProfiler.h" +#include "platform.h" +#include "ProfilerCodeAddressService.h" +#include "ProfilerControl.h" +#include "ProfilerParent.h" + +#include "chrome/common/ipc_channel.h" +#include "nsPrintfCString.h" +#include "nsThreadUtils.h" + +#include + +namespace mozilla { + +/* static */ DataMutexBase + ProfilerChild::sPendingChunkManagerUpdate{ + "ProfilerChild::sPendingChunkManagerUpdate"}; + +ProfilerChild::ProfilerChild() + : mThread(NS_GetCurrentThread()), mDestroyed(false) { + MOZ_COUNT_CTOR(ProfilerChild); +} + +ProfilerChild::~ProfilerChild() { MOZ_COUNT_DTOR(ProfilerChild); } + +void ProfilerChild::ResolveChunkUpdate( + PProfilerChild::AwaitNextChunkManagerUpdateResolver& aResolve) { + MOZ_ASSERT(!!aResolve, + "ResolveChunkUpdate should only be called when there's a pending " + "resolver"); + MOZ_ASSERT( + !mChunkManagerUpdate.IsNotUpdate(), + "ResolveChunkUpdate should only be called with a real or final update"); + MOZ_ASSERT( + !mDestroyed, + "ResolveChunkUpdate should not be called if the actor was destroyed"); + if (mChunkManagerUpdate.IsFinal()) { + // Final update, send a special "unreleased value", but don't clear the + // local copy so we know we got the final update. + std::move(aResolve)(ProfilerParent::MakeFinalUpdate()); + } else { + // Optimization note: The ProfileBufferChunkManagerUpdate constructor takes + // the newly-released chunks nsTArray by reference-to-const, therefore + // constructing and then moving the array here would make a copy. So instead + // we first give it an empty array, and then we can write the data directly + // into the update's array. + ProfileBufferChunkManagerUpdate update{ + mChunkManagerUpdate.UnreleasedBytes(), + mChunkManagerUpdate.ReleasedBytes(), + mChunkManagerUpdate.OldestDoneTimeStamp(), + {}}; + update.newlyReleasedChunks().SetCapacity( + mChunkManagerUpdate.NewlyReleasedChunksRef().size()); + for (const ProfileBufferControlledChunkManager::ChunkMetadata& chunk : + mChunkManagerUpdate.NewlyReleasedChunksRef()) { + update.newlyReleasedChunks().EmplaceBack(chunk.mDoneTimeStamp, + chunk.mBufferBytes); + } + + std::move(aResolve)(update); + + // Clear the update we just sent, so it's ready for later updates to be + // folded into it. + mChunkManagerUpdate.Clear(); + } + + // Discard the resolver, so it's empty next time there's a new request. + aResolve = nullptr; +} + +void ProfilerChild::ProcessChunkManagerUpdate( + ProfileBufferControlledChunkManager::Update&& aUpdate) { + if (mDestroyed) { + return; + } + // Always store the data, it could be the final update. + mChunkManagerUpdate.Fold(std::move(aUpdate)); + if (mAwaitNextChunkManagerUpdateResolver) { + // There is already a pending resolver, give it the info now. + ResolveChunkUpdate(mAwaitNextChunkManagerUpdateResolver); + } +} + +/* static */ void ProfilerChild::ProcessPendingUpdate() { + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + if (!lockedUpdate->mProfilerChild || lockedUpdate->mUpdate.IsNotUpdate()) { + return; + } + lockedUpdate->mProfilerChild->mThread->Dispatch(NS_NewRunnableFunction( + "ProfilerChild::ProcessPendingUpdate", []() mutable { + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + if (!lockedUpdate->mProfilerChild || + lockedUpdate->mUpdate.IsNotUpdate()) { + return; + } + lockedUpdate->mProfilerChild->ProcessChunkManagerUpdate( + std::move(lockedUpdate->mUpdate)); + lockedUpdate->mUpdate.Clear(); + })); +} + +/* static */ bool ProfilerChild::IsLockedOnCurrentThread() { + return sPendingChunkManagerUpdate.Mutex().IsLockedOnCurrentThread(); +} + +void ProfilerChild::SetupChunkManager() { + mChunkManager = profiler_get_controlled_chunk_manager(); + if (NS_WARN_IF(!mChunkManager)) { + return; + } + + // Make sure there are no updates (from a previous run). + mChunkManagerUpdate.Clear(); + { + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + lockedUpdate->mProfilerChild = this; + lockedUpdate->mUpdate.Clear(); + } + + mChunkManager->SetUpdateCallback( + [](ProfileBufferControlledChunkManager::Update&& aUpdate) { + // Updates from the chunk manager are stored for later processing. + // We avoid dispatching a task, as this could deadlock (if the queueing + // mutex is held elsewhere). + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + if (!lockedUpdate->mProfilerChild) { + return; + } + lockedUpdate->mUpdate.Fold(std::move(aUpdate)); + }); +} + +void ProfilerChild::ResetChunkManager() { + if (!mChunkManager) { + return; + } + + // We have a chunk manager, reset the callback, which will add a final + // pending update. + mChunkManager->SetUpdateCallback({}); + + // Clear the pending update. + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + lockedUpdate->mProfilerChild = nullptr; + lockedUpdate->mUpdate.Clear(); + // And process a final update right now. + ProcessChunkManagerUpdate( + ProfileBufferControlledChunkManager::Update(nullptr)); + + mChunkManager = nullptr; + mAwaitNextChunkManagerUpdateResolver = nullptr; +} + +mozilla::ipc::IPCResult ProfilerChild::RecvStart( + const ProfilerInitParams& params, StartResolver&& aResolve) { + nsTArray filterArray; + for (size_t i = 0; i < params.filters().Length(); ++i) { + filterArray.AppendElement(params.filters()[i].get()); + } + + profiler_start(PowerOfTwo32(params.entries()), params.interval(), + params.features(), filterArray.Elements(), + filterArray.Length(), params.activeTabID(), params.duration()); + + SetupChunkManager(); + + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvEnsureStarted( + const ProfilerInitParams& params, EnsureStartedResolver&& aResolve) { + nsTArray filterArray; + for (size_t i = 0; i < params.filters().Length(); ++i) { + filterArray.AppendElement(params.filters()[i].get()); + } + + profiler_ensure_started(PowerOfTwo32(params.entries()), params.interval(), + params.features(), filterArray.Elements(), + filterArray.Length(), params.activeTabID(), + params.duration()); + + SetupChunkManager(); + + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvStop(StopResolver&& aResolve) { + ResetChunkManager(); + profiler_stop(); + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvPause(PauseResolver&& aResolve) { + profiler_pause(); + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvResume(ResumeResolver&& aResolve) { + profiler_resume(); + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvPauseSampling( + PauseSamplingResolver&& aResolve) { + profiler_pause_sampling(); + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvResumeSampling( + ResumeSamplingResolver&& aResolve) { + profiler_resume_sampling(); + aResolve(/* unused */ true); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvWaitOnePeriodicSampling( + WaitOnePeriodicSamplingResolver&& aResolve) { + std::shared_ptr resolve = + std::make_shared(std::move(aResolve)); + if (!profiler_callback_after_sampling( + [self = RefPtr(this), resolve](SamplingState aSamplingState) mutable { + if (self->mDestroyed) { + return; + } + MOZ_RELEASE_ASSERT(self->mThread); + self->mThread->Dispatch(NS_NewRunnableFunction( + "nsProfiler::WaitOnePeriodicSampling result on main thread", + [resolve = std::move(resolve), aSamplingState]() { + (*resolve)(aSamplingState == + SamplingState::SamplingCompleted || + aSamplingState == + SamplingState::NoStackSamplingCompleted); + })); + })) { + // Callback was not added (e.g., profiler is not running) and will never be + // invoked, so we need to resolve the promise here. + (*resolve)(false); + } + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvClearAllPages() { + profiler_clear_all_pages(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvAwaitNextChunkManagerUpdate( + AwaitNextChunkManagerUpdateResolver&& aResolve) { + MOZ_ASSERT(!mDestroyed, + "Recv... should not be called if the actor was destroyed"); + // Pick up pending updates if any. + { + auto lockedUpdate = sPendingChunkManagerUpdate.Lock(); + if (lockedUpdate->mProfilerChild && !lockedUpdate->mUpdate.IsNotUpdate()) { + mChunkManagerUpdate.Fold(std::move(lockedUpdate->mUpdate)); + lockedUpdate->mUpdate.Clear(); + } + } + if (mChunkManagerUpdate.IsNotUpdate()) { + // No data yet, store the resolver for later. + mAwaitNextChunkManagerUpdateResolver = std::move(aResolve); + } else { + // We have data, send it now. + ResolveChunkUpdate(aResolve); + } + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvDestroyReleasedChunksAtOrBefore( + const TimeStamp& aTimeStamp) { + if (mChunkManager) { + mChunkManager->DestroyChunksAtOrBefore(aTimeStamp); + } + return IPC_OK(); +} + +struct GatherProfileThreadParameters + : public external::AtomicRefCounted { + MOZ_DECLARE_REFCOUNTED_TYPENAME(GatherProfileThreadParameters) + + GatherProfileThreadParameters( + RefPtr aProfilerChild, + RefPtr aProgress, + ProfilerChild::GatherProfileResolver&& aResolver) + : profilerChild(std::move(aProfilerChild)), + progress(std::move(aProgress)), + resolver(std::move(aResolver)) {} + + RefPtr profilerChild; + + FailureLatchSource failureLatchSource; + + // Separate RefPtr used when working on separate thread. This way, if the + // "ProfilerChild" thread decides to overwrite its mGatherProfileProgress with + // a new one, the work done here will still only use the old one. + RefPtr progress; + + // Resolver for the GatherProfile promise. Must only be called on the + // "ProfilerChild" thread. + ProfilerChild::GatherProfileResolver resolver; +}; + +/* static */ +void ProfilerChild::GatherProfileThreadFunction( + void* already_AddRefedParameters) { + PR_SetCurrentThreadName("GatherProfileThread"); + + RefPtr parameters = + already_AddRefed{ + static_cast( + already_AddRefedParameters)}; + + ProgressLogger progressLogger( + parameters->progress, "Gather-profile thread started", "Profile sent"); + using namespace mozilla::literals::ProportionValue_literals; // For `1_pc`. + + auto writer = + MakeUnique(parameters->failureLatchSource); + if (!profiler_get_profile_json( + *writer, + /* aSinceTime */ 0, + /* aIsShuttingDown */ false, + progressLogger.CreateSubLoggerFromTo( + 1_pc, "profiler_get_profile_json started", 99_pc, + "profiler_get_profile_json done"))) { + // Failed to get a profile, reset the writer pointer, so that we'll send a + // failure message. + writer.reset(); + } + + if (NS_WARN_IF(NS_FAILED( + parameters->profilerChild->mThread->Dispatch(NS_NewRunnableFunction( + "ProfilerChild::ProcessPendingUpdate", + [parameters, + // Forward progress logger to on-ProfilerChild-thread task, so + // that it doesn't get marked as 100% done when this off-thread + // function ends. + progressLogger = std::move(progressLogger), + writer = std::move(writer)]() mutable { + // We are now on the ProfilerChild thread, about to send the + // completed profile. Any incoming progress request will now be + // handled after this task ends, so updating the progress is now + // useless and we can just get rid of the progress storage. + if (parameters->profilerChild->mGatherProfileProgress == + parameters->progress) { + // The ProfilerChild progress is still the one we know. + parameters->profilerChild->mGatherProfileProgress = nullptr; + } + + // Shmem allocation and promise resolution must be made on the + // ProfilerChild thread, that's why this task was needed here. + mozilla::ipc::Shmem shmem; + if (writer) { + if (const size_t len = writer->ChunkedWriteFunc().Length(); + len < UINT32_MAX) { + bool shmemSuccess = true; + const bool copySuccess = + writer->ChunkedWriteFunc() + .CopyDataIntoLazilyAllocatedBuffer( + [&](size_t allocationSize) -> char* { + MOZ_ASSERT(allocationSize == len + 1); + if (parameters->profilerChild->AllocShmem( + allocationSize, &shmem)) { + return shmem.get(); + } + shmemSuccess = false; + return nullptr; + }); + if (!shmemSuccess || !copySuccess) { + const nsPrintfCString message( + (!shmemSuccess) + ? "*Could not create shmem for profile from pid " + "%u (%zu B)" + : "*Could not write profile from pid %u (%zu B)", + unsigned(profiler_current_process_id().ToNumber()), + len); + if (parameters->profilerChild->AllocShmem( + message.Length() + 1, &shmem)) { + strcpy(shmem.get(), message.Data()); + } + } + } else { + const nsPrintfCString message( + "*Profile from pid %u bigger (%zu) than shmem max " + "(%zu)", + unsigned(profiler_current_process_id().ToNumber()), len, + size_t(UINT32_MAX)); + if (parameters->profilerChild->AllocShmem( + message.Length() + 1, &shmem)) { + strcpy(shmem.get(), message.Data()); + } + } + writer = nullptr; + } else { + // No profile. + const char* failure = + parameters->failureLatchSource.GetFailure(); + const nsPrintfCString message( + "*Could not generate profile from pid %u%s%s", + unsigned(profiler_current_process_id().ToNumber()), + failure ? ", failure: " : "", failure ? failure : ""); + if (parameters->profilerChild->AllocShmem( + message.Length() + 1, &shmem)) { + strcpy(shmem.get(), message.Data()); + } + } + + SharedLibraryInfo sharedLibraryInfo = + SharedLibraryInfo::GetInfoForSelf(); + parameters->resolver(IPCProfileAndAdditionalInformation{ + shmem, Some(ProfileGenerationAdditionalInformation{ + std::move(sharedLibraryInfo)})}); + }))))) { + // Failed to dispatch the task to the ProfilerChild thread. The IPC cannot + // be resolved on this thread, so it will never be resolved! + // And it would be unsafe to modify mGatherProfileProgress; But the parent + // should notice that's it's not advancing anymore. + } +} + +mozilla::ipc::IPCResult ProfilerChild::RecvGatherProfile( + GatherProfileResolver&& aResolve) { + mGatherProfileProgress = MakeRefPtr(); + mGatherProfileProgress->SetProgress(ProportionValue{0.0}, + "Received gather-profile request"); + + auto parameters = MakeRefPtr( + this, mGatherProfileProgress, std::move(aResolve)); + + // The GatherProfileThreadFunction thread function will cast its void* + // argument to already_AddRefed. + parameters.get()->AddRef(); + PRThread* gatherProfileThread = PR_CreateThread( + PR_SYSTEM_THREAD, GatherProfileThreadFunction, parameters.get(), + PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, 0); + + if (!gatherProfileThread) { + // Failed to create and start worker thread, resolve with an empty profile. + mozilla::ipc::Shmem shmem; + if (AllocShmem(1, &shmem)) { + shmem.get()[0] = '\0'; + } + parameters->resolver(IPCProfileAndAdditionalInformation{shmem, Nothing()}); + // And clean up. + parameters.get()->Release(); + mGatherProfileProgress = nullptr; + } + + return IPC_OK(); +} + +mozilla::ipc::IPCResult ProfilerChild::RecvGetGatherProfileProgress( + GetGatherProfileProgressResolver&& aResolve) { + if (mGatherProfileProgress) { + aResolve(GatherProfileProgress{ + mGatherProfileProgress->Progress().ToUnderlyingType(), + nsCString(mGatherProfileProgress->LastLocation())}); + } else { + aResolve( + GatherProfileProgress{ProportionValue::MakeInvalid().ToUnderlyingType(), + nsCString("No gather-profile in progress")}); + } + return IPC_OK(); +} + +void ProfilerChild::ActorDestroy(ActorDestroyReason aActorDestroyReason) { + mDestroyed = true; +} + +void ProfilerChild::Destroy() { + ResetChunkManager(); + if (!mDestroyed) { + Close(); + } +} + +ProfileAndAdditionalInformation ProfilerChild::GrabShutdownProfile() { + LOG("GrabShutdownProfile"); + + UniquePtr service = + profiler_code_address_service_for_presymbolication(); + FailureLatchSource failureLatch; + SpliceableChunkedJSONWriter writer{failureLatch}; + writer.Start(); + auto rv = profiler_stream_json_for_this_process( + writer, /* aSinceTime */ 0, + /* aIsShuttingDown */ true, service.get(), ProgressLogger{}); + if (rv.isErr()) { + const char* failure = writer.GetFailure(); + return ProfileAndAdditionalInformation( + nsPrintfCString("*Profile unavailable for pid %u%s%s", + unsigned(profiler_current_process_id().ToNumber()), + failure ? ", failure: " : "", failure ? failure : "")); + } + + auto additionalInfo = rv.unwrap(); + + writer.StartArrayProperty("processes"); + writer.EndArray(); + writer.End(); + + const size_t len = writer.ChunkedWriteFunc().Length(); + // This string and information are destined to be sent as a shutdown profile, + // which is limited by the maximum IPC message size. + // TODO: IPC to change to shmem (bug 1780330), raising this limit to + // JS::MaxStringLength. + if (len + additionalInfo.SizeOf() >= + size_t(IPC::Channel::kMaximumMessageSize)) { + return ProfileAndAdditionalInformation( + nsPrintfCString("*Profile from pid %u bigger (%zu) than IPC max (%zu)", + unsigned(profiler_current_process_id().ToNumber()), len, + size_t(IPC::Channel::kMaximumMessageSize))); + } + + nsCString profileCString; + if (!profileCString.SetLength(len, fallible)) { + return ProfileAndAdditionalInformation(nsPrintfCString( + "*Could not allocate %zu bytes for profile from pid %u", len, + unsigned(profiler_current_process_id().ToNumber()))); + } + MOZ_ASSERT(*(profileCString.Data() + len) == '\0', + "We expected a null at the end of the string buffer, to be " + "rewritten by CopyDataIntoLazilyAllocatedBuffer"); + + char* const profileBeginWriting = profileCString.BeginWriting(); + if (!profileBeginWriting) { + return ProfileAndAdditionalInformation( + nsPrintfCString("*Could not write profile from pid %u", + unsigned(profiler_current_process_id().ToNumber()))); + } + + // Here, we have enough space reserved in `profileCString`, starting at + // `profileBeginWriting`, copy the JSON profile there. + if (!writer.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer( + [&](size_t aBufferLen) -> char* { + MOZ_RELEASE_ASSERT(aBufferLen == len + 1); + return profileBeginWriting; + })) { + return ProfileAndAdditionalInformation( + nsPrintfCString("*Could not copy profile from pid %u", + unsigned(profiler_current_process_id().ToNumber()))); + } + MOZ_ASSERT(*(profileCString.Data() + len) == '\0', + "We still expected a null at the end of the string buffer"); + + return ProfileAndAdditionalInformation{std::move(profileCString), + std::move(additionalInfo)}; +} + +} // namespace mozilla diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp new file mode 100644 index 0000000000..cf33789f69 --- /dev/null +++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp @@ -0,0 +1,216 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerIOInterposeObserver.h" +#include "GeckoProfiler.h" + +using namespace mozilla; + +/* static */ +ProfilerIOInterposeObserver& ProfilerIOInterposeObserver::GetInstance() { + static ProfilerIOInterposeObserver sProfilerIOInterposeObserver; + return sProfilerIOInterposeObserver; +} + +namespace geckoprofiler::markers { +struct FileIOMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("FileIO"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + const ProfilerString8View& aOperation, + const ProfilerString8View& aSource, + const ProfilerString8View& aFilename, + MarkerThreadId aOperationThreadId) { + aWriter.StringProperty("operation", aOperation); + aWriter.StringProperty("source", aSource); + if (aFilename.Length() != 0) { + aWriter.StringProperty("filename", aFilename); + } + if (!aOperationThreadId.IsUnspecified()) { + // Tech note: If `ToNumber()` returns a uint64_t, the conversion to + // int64_t is "implementation-defined" before C++20. This is acceptable + // here, because this is a one-way conversion to a unique identifier + // that's used to visually separate data by thread on the front-end. + aWriter.IntProperty( + "threadId", + static_cast(aOperationThreadId.ThreadId().ToNumber())); + } + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable, + MS::Location::TimelineFileIO}; + schema.AddKeyLabelFormatSearchable("operation", "Operation", + MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("source", "Source", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("filename", "Filename", + MS::Format::FilePath, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("threadId", "Thread ID", + MS::Format::String, + MS::Searchable::Searchable); + return schema; + } +}; +} // namespace geckoprofiler::markers + +static auto GetFilename(IOInterposeObserver::Observation& aObservation) { + AUTO_PROFILER_STATS(IO_filename); + constexpr size_t scExpectedMaxFilename = 512; + nsAutoStringN filename16; + aObservation.Filename(filename16); + nsAutoCStringN filename8; + if (!filename16.IsEmpty()) { + CopyUTF16toUTF8(filename16, filename8); + } + return filename8; +} + +void ProfilerIOInterposeObserver::Observe(Observation& aObservation) { + if (profiler_is_locked_on_current_thread()) { + // Don't observe I/Os originating from the profiler itself (when internally + // locked) to avoid deadlocks when calling profiler functions. + AUTO_PROFILER_STATS(IO_profiler_locked); + return; + } + + Maybe maybeFeatures = profiler_features_if_active_and_unpaused(); + if (maybeFeatures.isNothing()) { + return; + } + uint32_t features = *maybeFeatures; + + if (!profiler_thread_is_being_profiled_for_markers( + profiler_main_thread_id()) && + !profiler_thread_is_being_profiled_for_markers()) { + return; + } + + AUTO_PROFILER_LABEL("ProfilerIOInterposeObserver", PROFILER); + if (IsMainThread()) { + // This is the main thread. + // Capture a marker if any "IO" feature is on. + // If it's not being profiled, we have nowhere to store FileIO markers. + if (!profiler_thread_is_being_profiled_for_markers() || + !(features & ProfilerFeature::MainThreadIO)) { + return; + } + AUTO_PROFILER_STATS(IO_MT); + nsAutoCString type{aObservation.FileType()}; + type.AppendLiteral("IO"); + + // Store the marker in the current thread. + PROFILER_MARKER( + type, OTHER, + MarkerOptions( + MarkerTiming::Interval(aObservation.Start(), aObservation.End()), + MarkerStack::Capture()), + FileIOMarker, + // aOperation + ProfilerString8View::WrapNullTerminatedString( + aObservation.ObservedOperationString()), + // aSource + ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()), + // aFilename + GetFilename(aObservation), + // aOperationThreadId - Do not include a thread ID, as it's the same as + // the markers. Only include this field when the marker is being sent + // from another thread. + MarkerThreadId{}); + + } else if (profiler_thread_is_being_profiled_for_markers()) { + // This is a non-main thread that is being profiled. + if (!(features & ProfilerFeature::FileIO)) { + return; + } + AUTO_PROFILER_STATS(IO_off_MT); + + nsAutoCString type{aObservation.FileType()}; + type.AppendLiteral("IO"); + + // Share a backtrace between the marker on this thread, and the marker on + // the main thread. + UniquePtr backtrace = profiler_capture_backtrace(); + + // Store the marker in the current thread. + PROFILER_MARKER( + type, OTHER, + MarkerOptions( + MarkerTiming::Interval(aObservation.Start(), aObservation.End()), + backtrace ? MarkerStack::UseBacktrace(*backtrace) + : MarkerStack::NoStack()), + FileIOMarker, + // aOperation + ProfilerString8View::WrapNullTerminatedString( + aObservation.ObservedOperationString()), + // aSource + ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()), + // aFilename + GetFilename(aObservation), + // aOperationThreadId - Do not include a thread ID, as it's the same as + // the markers. Only include this field when the marker is being sent + // from another thread. + MarkerThreadId{}); + + // Store the marker in the main thread as well, with a distinct marker name + // and thread id. + type.AppendLiteral(" (non-main thread)"); + PROFILER_MARKER( + type, OTHER, + MarkerOptions( + MarkerTiming::Interval(aObservation.Start(), aObservation.End()), + backtrace ? MarkerStack::UseBacktrace(*backtrace) + : MarkerStack::NoStack(), + // This is the important piece that changed. + // It will send a marker to the main thread. + MarkerThreadId::MainThread()), + FileIOMarker, + // aOperation + ProfilerString8View::WrapNullTerminatedString( + aObservation.ObservedOperationString()), + // aSource + ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()), + // aFilename + GetFilename(aObservation), + // aOperationThreadId - Include the thread ID in the payload. + MarkerThreadId::CurrentThread()); + + } else { + // This is a thread that is not being profiled. We still want to capture + // file I/Os (to the main thread) if the "FileIOAll" feature is on. + if (!(features & ProfilerFeature::FileIOAll)) { + return; + } + AUTO_PROFILER_STATS(IO_other); + nsAutoCString type{aObservation.FileType()}; + if (profiler_is_active_and_thread_is_registered()) { + type.AppendLiteral("IO (non-profiled thread)"); + } else { + type.AppendLiteral("IO (unregistered thread)"); + } + + // Only store this marker on the main thread, as this thread was not being + // profiled. + PROFILER_MARKER( + type, OTHER, + MarkerOptions( + MarkerTiming::Interval(aObservation.Start(), aObservation.End()), + MarkerStack::Capture(), + // Store this marker on the main thread. + MarkerThreadId::MainThread()), + FileIOMarker, + // aOperation + ProfilerString8View::WrapNullTerminatedString( + aObservation.ObservedOperationString()), + // aSource + ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()), + // aFilename + GetFilename(aObservation), + // aOperationThreadId - Note which thread this marker is coming from. + MarkerThreadId::CurrentThread()); + } +} diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.h b/tools/profiler/gecko/ProfilerIOInterposeObserver.h new file mode 100644 index 0000000000..9e22a34f15 --- /dev/null +++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.h @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILERIOINTERPOSEOBSERVER_H +#define PROFILERIOINTERPOSEOBSERVER_H + +#include "mozilla/IOInterposer.h" +#include "nsISupportsImpl.h" + +namespace mozilla { + +/** + * This class is the observer that calls into the profiler whenever + * main thread I/O occurs. + */ +class ProfilerIOInterposeObserver final : public IOInterposeObserver { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ProfilerIOInterposeObserver) + + public: + static ProfilerIOInterposeObserver& GetInstance(); + + virtual void Observe(Observation& aObservation) override; + + private: + ProfilerIOInterposeObserver() = default; + virtual ~ProfilerIOInterposeObserver() {} +}; + +} // namespace mozilla + +#endif // PROFILERIOINTERPOSEOBSERVER_H diff --git a/tools/profiler/gecko/ProfilerParent.cpp b/tools/profiler/gecko/ProfilerParent.cpp new file mode 100644 index 0000000000..83bce6d982 --- /dev/null +++ b/tools/profiler/gecko/ProfilerParent.cpp @@ -0,0 +1,1002 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerParent.h" + +#ifdef MOZ_GECKO_PROFILER +# include "nsProfiler.h" +# include "platform.h" +#endif + +#include "GeckoProfiler.h" +#include "ProfilerControl.h" +#include "mozilla/BaseAndGeckoProfilerDetail.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/DataMutex.h" +#include "mozilla/IOInterposer.h" +#include "mozilla/ipc/Endpoint.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfileBufferControlledChunkManager.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Unused.h" +#include "nsTArray.h" +#include "nsThreadUtils.h" + +#include + +namespace mozilla { + +using namespace ipc; + +/* static */ +Endpoint ProfilerParent::CreateForProcess( + base::ProcessId aOtherPid) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + Endpoint child; +#ifdef MOZ_GECKO_PROFILER + Endpoint parent; + nsresult rv = PProfiler::CreateEndpoints(&parent, &child); + + if (NS_FAILED(rv)) { + MOZ_CRASH("Failed to create top level actor for PProfiler!"); + } + + RefPtr actor = new ProfilerParent(aOtherPid); + if (!parent.Bind(actor)) { + MOZ_CRASH("Failed to bind parent actor for PProfiler!"); + } + + actor->Init(); +#endif + + return child; +} + +#ifdef MOZ_GECKO_PROFILER + +class ProfilerParentTracker; + +// This class is responsible for gathering updates from chunk managers in +// different process, and request for the oldest chunks to be destroyed whenever +// the given memory limit is reached. +class ProfileBufferGlobalController final { + public: + explicit ProfileBufferGlobalController(size_t aMaximumBytes); + + ~ProfileBufferGlobalController(); + + void HandleChildChunkManagerUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate); + + static bool IsLockedOnCurrentThread(); + + private: + // Calls aF(Json::Value&). + template + void Log(F&& aF); + + static void LogUpdateChunks(Json::Value& updates, base::ProcessId aProcessId, + const TimeStamp& aTimeStamp, int aChunkDiff); + void LogUpdate(base::ProcessId aProcessId, + const ProfileBufferControlledChunkManager::Update& aUpdate); + void LogDeletion(base::ProcessId aProcessId, const TimeStamp& aTimeStamp); + + void HandleChunkManagerNonFinalUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate, + ProfileBufferControlledChunkManager& aParentChunkManager); + + const size_t mMaximumBytes; + + const base::ProcessId mParentProcessId = base::GetCurrentProcId(); + + struct ParentChunkManagerAndPendingUpdate { + ProfileBufferControlledChunkManager* mChunkManager = nullptr; + ProfileBufferControlledChunkManager::Update mPendingUpdate; + }; + + static DataMutexBase + sParentChunkManagerAndPendingUpdate; + + size_t mUnreleasedTotalBytes = 0; + + struct PidAndBytes { + base::ProcessId mProcessId; + size_t mBytes; + + // For searching and sorting. + bool operator==(base::ProcessId aSearchedProcessId) const { + return mProcessId == aSearchedProcessId; + } + bool operator==(const PidAndBytes& aOther) const { + return mProcessId == aOther.mProcessId; + } + bool operator<(base::ProcessId aSearchedProcessId) const { + return mProcessId < aSearchedProcessId; + } + bool operator<(const PidAndBytes& aOther) const { + return mProcessId < aOther.mProcessId; + } + }; + using PidAndBytesArray = nsTArray; + PidAndBytesArray mUnreleasedBytesByPid; + + size_t mReleasedTotalBytes = 0; + + struct TimeStampAndBytesAndPid { + TimeStamp mTimeStamp; + size_t mBytes; + base::ProcessId mProcessId; + + // For searching and sorting. + bool operator==(const TimeStampAndBytesAndPid& aOther) const { + // Sort first by timestamps, and then by pid in rare cases with the same + // timestamps. + return mTimeStamp == aOther.mTimeStamp && mProcessId == aOther.mProcessId; + } + bool operator<(const TimeStampAndBytesAndPid& aOther) const { + // Sort first by timestamps, and then by pid in rare cases with the same + // timestamps. + return mTimeStamp < aOther.mTimeStamp || + (MOZ_UNLIKELY(mTimeStamp == aOther.mTimeStamp) && + mProcessId < aOther.mProcessId); + } + }; + using TimeStampAndBytesAndPidArray = nsTArray; + TimeStampAndBytesAndPidArray mReleasedChunksByTime; +}; + +/* static */ +DataMutexBase + ProfileBufferGlobalController::sParentChunkManagerAndPendingUpdate{ + "ProfileBufferGlobalController::sParentChunkManagerAndPendingUpdate"}; + +// This singleton class tracks live ProfilerParent's (meaning there's a current +// connection with a child process). +// It also knows when the local profiler is running. +// And when both the profiler is running and at least one child is present, it +// creates a ProfileBufferGlobalController and forwards chunk updates to it. +class ProfilerParentTracker final { + public: + static void StartTracking(ProfilerParent* aParent); + static void StopTracking(ProfilerParent* aParent); + + static void ProfilerStarted(uint32_t aEntries); + static void ProfilerWillStopIfStarted(); + + // Number of non-destroyed tracked ProfilerParents. + static size_t ProfilerParentCount(); + + template + static void Enumerate(FuncType&& aIterFunc); + + template + static void ForChild(base::ProcessId aChildPid, FuncType&& aIterFunc); + + static void ForwardChildChunkManagerUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate); + + ProfilerParentTracker(); + ~ProfilerParentTracker(); + + private: + // Get the singleton instance; Create one on the first request, unless we are + // past XPCOMShutdownThreads, which is when it should get destroyed. + static ProfilerParentTracker* GetInstance(); + + // List of parents for currently-connected child processes. + nsTArray mProfilerParents; + + // If non-0, the parent profiler is running, with this limit (in number of + // entries.) This is needed here, because the parent profiler may start + // running before child processes are known (e.g., startup profiling). + uint32_t mEntries = 0; + + // When the profiler is running and there is at least one parent-child + // connection, this is the controller that should receive chunk updates. + Maybe mMaybeController; +}; + +static const Json::StaticString logRoot{"bufferGlobalController"}; + +template +void ProfileBufferGlobalController::Log(F&& aF) { + ProfilingLog::Access([&](Json::Value& aLog) { + Json::Value& root = aLog[logRoot]; + if (!root.isObject()) { + root = Json::Value(Json::objectValue); + root[Json::StaticString{"logBegin" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + } + std::forward(aF)(root); + }); +} + +/* static */ +void ProfileBufferGlobalController::LogUpdateChunks(Json::Value& updates, + base::ProcessId aProcessId, + const TimeStamp& aTimeStamp, + int aChunkDiff) { + MOZ_ASSERT(updates.isArray()); + Json::Value row{Json::arrayValue}; + row.append(Json::Value{Json::UInt64(aProcessId)}); + row.append(ProfilingLog::Timestamp(aTimeStamp)); + row.append(Json::Value{Json::Int(aChunkDiff)}); + updates.append(std::move(row)); +} + +void ProfileBufferGlobalController::LogUpdate( + base::ProcessId aProcessId, + const ProfileBufferControlledChunkManager::Update& aUpdate) { + Log([&](Json::Value& aRoot) { + Json::Value& updates = aRoot[Json::StaticString{"updates"}]; + if (!updates.isArray()) { + aRoot[Json::StaticString{"updatesSchema"}] = + Json::StaticString{"0: pid, 1: chunkRelease_TSms, 3: chunkDiff"}; + updates = Json::Value{Json::arrayValue}; + } + if (aUpdate.IsFinal()) { + LogUpdateChunks(updates, aProcessId, TimeStamp{}, 0); + } else if (!aUpdate.IsNotUpdate()) { + for (const auto& chunk : aUpdate.NewlyReleasedChunksRef()) { + LogUpdateChunks(updates, aProcessId, chunk.mDoneTimeStamp, 1); + } + } + }); +} + +void ProfileBufferGlobalController::LogDeletion(base::ProcessId aProcessId, + const TimeStamp& aTimeStamp) { + Log([&](Json::Value& aRoot) { + Json::Value& updates = aRoot[Json::StaticString{"updates"}]; + if (!updates.isArray()) { + updates = Json::Value{Json::arrayValue}; + } + LogUpdateChunks(updates, aProcessId, aTimeStamp, -1); + }); +} + +ProfileBufferGlobalController::ProfileBufferGlobalController( + size_t aMaximumBytes) + : mMaximumBytes(aMaximumBytes) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + Log([](Json::Value& aRoot) { + aRoot[Json::StaticString{"controllerCreationTime" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + }); + + // This is the local chunk manager for this parent process, so updates can be + // handled here. + ProfileBufferControlledChunkManager* parentChunkManager = + profiler_get_controlled_chunk_manager(); + + if (NS_WARN_IF(!parentChunkManager)) { + Log([](Json::Value& aRoot) { + aRoot[Json::StaticString{"controllerCreationFailureReason"}] = + "No parent chunk manager"; + }); + return; + } + + { + auto lockedParentChunkManagerAndPendingUpdate = + sParentChunkManagerAndPendingUpdate.Lock(); + lockedParentChunkManagerAndPendingUpdate->mChunkManager = + parentChunkManager; + } + + parentChunkManager->SetUpdateCallback( + [this](ProfileBufferControlledChunkManager::Update&& aUpdate) { + MOZ_ASSERT(!aUpdate.IsNotUpdate(), + "Update callback should never be given a non-update"); + auto lockedParentChunkManagerAndPendingUpdate = + sParentChunkManagerAndPendingUpdate.Lock(); + if (aUpdate.IsFinal()) { + // Final update of the parent. + // We cannot keep the chunk manager, and there's no point handling + // updates anymore. Do some cleanup now, to free resources before + // we're destroyed. + lockedParentChunkManagerAndPendingUpdate->mChunkManager = nullptr; + lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear(); + mUnreleasedTotalBytes = 0; + mUnreleasedBytesByPid.Clear(); + mReleasedTotalBytes = 0; + mReleasedChunksByTime.Clear(); + return; + } + if (!lockedParentChunkManagerAndPendingUpdate->mChunkManager) { + // No chunk manager, ignore updates. + return; + } + // Special handling of parent non-final updates: + // These updates are coming from *this* process, and may originate from + // scopes in any thread where any lock is held, so using other locks (to + // e.g., dispatch tasks or send IPCs) could trigger a deadlock. Instead, + // parent updates are stored locally and handled when the next + // non-parent update needs handling, see HandleChildChunkManagerUpdate. + lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Fold( + std::move(aUpdate)); + }); +} + +ProfileBufferGlobalController ::~ProfileBufferGlobalController() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + // Extract the parent chunk manager (if still set). + // This means any update after this will be ignored. + ProfileBufferControlledChunkManager* parentChunkManager = []() { + auto lockedParentChunkManagerAndPendingUpdate = + sParentChunkManagerAndPendingUpdate.Lock(); + lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear(); + return std::exchange( + lockedParentChunkManagerAndPendingUpdate->mChunkManager, nullptr); + }(); + if (parentChunkManager) { + // We had not received a final update yet, so the chunk manager is still + // valid. Reset the callback in the chunk manager, this will immediately + // invoke the callback with the final empty update; see handling above. + parentChunkManager->SetUpdateCallback({}); + } +} + +void ProfileBufferGlobalController::HandleChildChunkManagerUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + MOZ_ASSERT(aProcessId != mParentProcessId); + + MOZ_ASSERT(!aUpdate.IsNotUpdate(), + "HandleChildChunkManagerUpdate should not be given a non-update"); + + auto lockedParentChunkManagerAndPendingUpdate = + sParentChunkManagerAndPendingUpdate.Lock(); + if (!lockedParentChunkManagerAndPendingUpdate->mChunkManager) { + // No chunk manager, ignore updates. + return; + } + + if (aUpdate.IsFinal()) { + // Final update in a child process, remove all traces of that process. + LogUpdate(aProcessId, aUpdate); + size_t index = mUnreleasedBytesByPid.BinaryIndexOf(aProcessId); + if (index != PidAndBytesArray::NoIndex) { + // We already have a value for this pid. + PidAndBytes& pidAndBytes = mUnreleasedBytesByPid[index]; + mUnreleasedTotalBytes -= pidAndBytes.mBytes; + mUnreleasedBytesByPid.RemoveElementAt(index); + } + + size_t released = 0; + mReleasedChunksByTime.RemoveElementsBy( + [&released, aProcessId](const auto& chunk) { + const bool match = chunk.mProcessId == aProcessId; + if (match) { + released += chunk.mBytes; + } + return match; + }); + if (released != 0) { + mReleasedTotalBytes -= released; + } + + // Total can only have gone down, so there's no need to check the limit. + return; + } + + // Non-final update in child process. + + // Before handling the child update, we may have pending updates from the + // parent, which can be processed now since we're in an IPC callback outside + // of any profiler-related scope. + if (!lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.IsNotUpdate()) { + MOZ_ASSERT( + !lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.IsFinal()); + HandleChunkManagerNonFinalUpdate( + mParentProcessId, + std::move(lockedParentChunkManagerAndPendingUpdate->mPendingUpdate), + *lockedParentChunkManagerAndPendingUpdate->mChunkManager); + lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear(); + } + + HandleChunkManagerNonFinalUpdate( + aProcessId, std::move(aUpdate), + *lockedParentChunkManagerAndPendingUpdate->mChunkManager); +} + +/* static */ +bool ProfileBufferGlobalController::IsLockedOnCurrentThread() { + return sParentChunkManagerAndPendingUpdate.Mutex().IsLockedOnCurrentThread(); +} + +void ProfileBufferGlobalController::HandleChunkManagerNonFinalUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate, + ProfileBufferControlledChunkManager& aParentChunkManager) { + MOZ_ASSERT(!aUpdate.IsFinal()); + LogUpdate(aProcessId, aUpdate); + + size_t index = mUnreleasedBytesByPid.BinaryIndexOf(aProcessId); + if (index != PidAndBytesArray::NoIndex) { + // We already have a value for this pid. + PidAndBytes& pidAndBytes = mUnreleasedBytesByPid[index]; + mUnreleasedTotalBytes = + mUnreleasedTotalBytes - pidAndBytes.mBytes + aUpdate.UnreleasedBytes(); + pidAndBytes.mBytes = aUpdate.UnreleasedBytes(); + } else { + // New pid. + mUnreleasedBytesByPid.InsertElementSorted( + PidAndBytes{aProcessId, aUpdate.UnreleasedBytes()}); + mUnreleasedTotalBytes += aUpdate.UnreleasedBytes(); + } + + size_t destroyedReleased = 0; + if (!aUpdate.OldestDoneTimeStamp().IsNull()) { + size_t i = 0; + for (; i < mReleasedChunksByTime.Length(); ++i) { + if (mReleasedChunksByTime[i].mTimeStamp >= + aUpdate.OldestDoneTimeStamp()) { + break; + } + } + // Here, i is the index of the first item that's at or after + // aUpdate.mOldestDoneTimeStamp, so chunks from aProcessId before that have + // been destroyed. + while (i != 0) { + --i; + const TimeStampAndBytesAndPid& item = mReleasedChunksByTime[i]; + if (item.mProcessId == aProcessId) { + destroyedReleased += item.mBytes; + mReleasedChunksByTime.RemoveElementAt(i); + } + } + } + + size_t newlyReleased = 0; + for (const ProfileBufferControlledChunkManager::ChunkMetadata& chunk : + aUpdate.NewlyReleasedChunksRef()) { + newlyReleased += chunk.mBufferBytes; + mReleasedChunksByTime.InsertElementSorted(TimeStampAndBytesAndPid{ + chunk.mDoneTimeStamp, chunk.mBufferBytes, aProcessId}); + } + + mReleasedTotalBytes = mReleasedTotalBytes - destroyedReleased + newlyReleased; + +# ifdef DEBUG + size_t totalReleased = 0; + for (const TimeStampAndBytesAndPid& item : mReleasedChunksByTime) { + totalReleased += item.mBytes; + } + MOZ_ASSERT(mReleasedTotalBytes == totalReleased); +# endif // DEBUG + + std::vector toDestroy; + while (mUnreleasedTotalBytes + mReleasedTotalBytes > mMaximumBytes && + !mReleasedChunksByTime.IsEmpty()) { + // We have reached the global memory limit, and there *are* released chunks + // that can be destroyed. Start with the first one, which is the oldest. + const TimeStampAndBytesAndPid& oldest = mReleasedChunksByTime[0]; + LogDeletion(oldest.mProcessId, oldest.mTimeStamp); + mReleasedTotalBytes -= oldest.mBytes; + if (oldest.mProcessId == mParentProcessId) { + aParentChunkManager.DestroyChunksAtOrBefore(oldest.mTimeStamp); + } else { + ProfilerParentTracker::ForChild( + oldest.mProcessId, + [timestamp = oldest.mTimeStamp](ProfilerParent* profilerParent) { + Unused << profilerParent->SendDestroyReleasedChunksAtOrBefore( + timestamp); + }); + } + mReleasedChunksByTime.RemoveElementAt(0); + } +} + +/* static */ +ProfilerParentTracker* ProfilerParentTracker::GetInstance() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + // The main instance pointer, it will be initialized at most once, before + // XPCOMShutdownThreads. + static UniquePtr instance = nullptr; + if (MOZ_UNLIKELY(!instance)) { + if (PastShutdownPhase(ShutdownPhase::XPCOMShutdownThreads)) { + return nullptr; + } + + instance = MakeUnique(); + + // The tracker should get destroyed before threads are shutdown, because its + // destruction closes extant channels, which could trigger promise + // rejections that need to be dispatched to other threads. + ClearOnShutdown(&instance, ShutdownPhase::XPCOMShutdownThreads); + } + + return instance.get(); +} + +/* static */ +void ProfilerParentTracker::StartTracking(ProfilerParent* aProfilerParent) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + if (tracker->mMaybeController.isNothing() && tracker->mEntries != 0) { + // There is no controller yet, but the profiler has started. + // Since we're adding a ProfilerParent, it's a good time to start + // controlling the global memory usage of the profiler. + // (And this helps delay the Controller startup, because the parent profiler + // can start *very* early in the process, when some resources like threads + // are not ready yet.) + tracker->mMaybeController.emplace(size_t(tracker->mEntries) * 8u); + } + + tracker->mProfilerParents.AppendElement(aProfilerParent); +} + +/* static */ +void ProfilerParentTracker::StopTracking(ProfilerParent* aParent) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + tracker->mProfilerParents.RemoveElement(aParent); +} + +/* static */ +void ProfilerParentTracker::ProfilerStarted(uint32_t aEntries) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + tracker->mEntries = aEntries; + + if (tracker->mMaybeController.isNothing() && + !tracker->mProfilerParents.IsEmpty()) { + // We are already tracking child processes, so it's a good time to start + // controlling the global memory usage of the profiler. + tracker->mMaybeController.emplace(size_t(tracker->mEntries) * 8u); + } +} + +/* static */ +void ProfilerParentTracker::ProfilerWillStopIfStarted() { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + tracker->mEntries = 0; + tracker->mMaybeController = Nothing{}; +} + +/* static */ +size_t ProfilerParentTracker::ProfilerParentCount() { + size_t count = 0; + ProfilerParentTracker* tracker = GetInstance(); + if (tracker) { + for (ProfilerParent* profilerParent : tracker->mProfilerParents) { + if (!profilerParent->mDestroyed) { + ++count; + } + } + } + return count; +} + +template +/* static */ +void ProfilerParentTracker::Enumerate(FuncType&& aIterFunc) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + for (ProfilerParent* profilerParent : tracker->mProfilerParents) { + if (!profilerParent->mDestroyed) { + aIterFunc(profilerParent); + } + } +} + +template +/* static */ +void ProfilerParentTracker::ForChild(base::ProcessId aChildPid, + FuncType&& aIterFunc) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker) { + return; + } + + for (ProfilerParent* profilerParent : tracker->mProfilerParents) { + if (profilerParent->mChildPid == aChildPid) { + if (!profilerParent->mDestroyed) { + std::forward(aIterFunc)(profilerParent); + } + return; + } + } +} + +/* static */ +void ProfilerParentTracker::ForwardChildChunkManagerUpdate( + base::ProcessId aProcessId, + ProfileBufferControlledChunkManager::Update&& aUpdate) { + ProfilerParentTracker* tracker = GetInstance(); + if (!tracker || tracker->mMaybeController.isNothing()) { + return; + } + + MOZ_ASSERT(!aUpdate.IsNotUpdate(), + "No process should ever send a non-update"); + tracker->mMaybeController->HandleChildChunkManagerUpdate(aProcessId, + std::move(aUpdate)); +} + +ProfilerParentTracker::ProfilerParentTracker() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_COUNT_CTOR(ProfilerParentTracker); +} + +ProfilerParentTracker::~ProfilerParentTracker() { + // This destructor should only be called on the main thread. + MOZ_RELEASE_ASSERT(NS_IsMainThread() || + // OR we're not on the main thread (including if we are + // past the end of `main()`), which is fine *if* there are + // no ProfilerParent's still registered, in which case + // nothing else will happen in this destructor anyway. + // See bug 1713971 for more information. + mProfilerParents.IsEmpty()); + MOZ_COUNT_DTOR(ProfilerParentTracker); + + // Close the channels of any profiler parents that haven't been destroyed. + for (ProfilerParent* profilerParent : mProfilerParents.Clone()) { + if (!profilerParent->mDestroyed) { + // Keep the object alive until the call to Close() has completed. + // Close() will trigger a call to DeallocPProfilerParent. + RefPtr actor = profilerParent; + actor->Close(); + } + } +} + +ProfilerParent::ProfilerParent(base::ProcessId aChildPid) + : mChildPid(aChildPid), mDestroyed(false) { + MOZ_COUNT_CTOR(ProfilerParent); + + MOZ_RELEASE_ASSERT(NS_IsMainThread()); +} + +void ProfilerParent::Init() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + ProfilerParentTracker::StartTracking(this); + + // We propagated the profiler state from the parent process to the child + // process through MOZ_PROFILER_STARTUP* environment variables. + // However, the profiler state might have changed in this process since then, + // and now that an active communication channel has been established with the + // child process, it's a good time to sync up the two profilers again. + + int entries = 0; + Maybe duration = Nothing(); + double interval = 0; + mozilla::Vector filters; + uint32_t features; + uint64_t activeTabID; + profiler_get_start_params(&entries, &duration, &interval, &features, &filters, + &activeTabID); + + if (entries != 0) { + ProfilerInitParams ipcParams; + ipcParams.enabled() = true; + ipcParams.entries() = entries; + ipcParams.duration() = duration; + ipcParams.interval() = interval; + ipcParams.features() = features; + ipcParams.activeTabID() = activeTabID; + + // If the filters exclude our pid, make sure it's stopped, otherwise + // continue with starting it. + if (!profiler::detail::FiltersExcludePid( + filters, ProfilerProcessId::FromNumber(mChildPid))) { + ipcParams.filters().SetCapacity(filters.length()); + for (const char* filter : filters) { + ipcParams.filters().AppendElement(filter); + } + + Unused << SendEnsureStarted(ipcParams); + RequestChunkManagerUpdate(); + return; + } + } + + Unused << SendStop(); +} +#endif // MOZ_GECKO_PROFILER + +ProfilerParent::~ProfilerParent() { + MOZ_COUNT_DTOR(ProfilerParent); + + MOZ_RELEASE_ASSERT(NS_IsMainThread()); +#ifdef MOZ_GECKO_PROFILER + ProfilerParentTracker::StopTracking(this); +#endif +} + +#ifdef MOZ_GECKO_PROFILER +/* static */ +nsTArray +ProfilerParent::GatherProfiles() { + nsTArray results; + if (!NS_IsMainThread()) { + return results; + } + + results.SetCapacity(ProfilerParentTracker::ProfilerParentCount()); + ProfilerParentTracker::Enumerate([&](ProfilerParent* profilerParent) { + results.AppendElement(SingleProcessProfilePromiseAndChildPid{ + profilerParent->SendGatherProfile(), profilerParent->mChildPid}); + }); + return results; +} + +/* static */ +RefPtr +ProfilerParent::RequestGatherProfileProgress(base::ProcessId aChildPid) { + RefPtr promise; + ProfilerParentTracker::ForChild( + aChildPid, [&promise](ProfilerParent* profilerParent) { + promise = profilerParent->SendGetGatherProfileProgress(); + }); + return promise; +} + +// Magic value for ProfileBufferChunkManagerUpdate::unreleasedBytes meaning +// that this is a final update from a child. +constexpr static uint64_t scUpdateUnreleasedBytesFINAL = uint64_t(-1); + +/* static */ +ProfileBufferChunkManagerUpdate ProfilerParent::MakeFinalUpdate() { + return ProfileBufferChunkManagerUpdate{ + uint64_t(scUpdateUnreleasedBytesFINAL), 0, TimeStamp{}, + nsTArray{}}; +} + +/* static */ +bool ProfilerParent::IsLockedOnCurrentThread() { + return ProfileBufferGlobalController::IsLockedOnCurrentThread(); +} + +void ProfilerParent::RequestChunkManagerUpdate() { + if (mDestroyed) { + return; + } + + RefPtr updatePromise = + SendAwaitNextChunkManagerUpdate(); + updatePromise->Then( + GetMainThreadSerialEventTarget(), __func__, + [self = RefPtr(this)]( + const ProfileBufferChunkManagerUpdate& aUpdate) { + if (aUpdate.unreleasedBytes() == scUpdateUnreleasedBytesFINAL) { + // Special value meaning it's the final update from that child. + ProfilerParentTracker::ForwardChildChunkManagerUpdate( + self->mChildPid, + ProfileBufferControlledChunkManager::Update(nullptr)); + } else { + // Not the final update, translate it. + std::vector + chunks; + if (!aUpdate.newlyReleasedChunks().IsEmpty()) { + chunks.reserve(aUpdate.newlyReleasedChunks().Length()); + for (const ProfileBufferChunkMetadata& chunk : + aUpdate.newlyReleasedChunks()) { + chunks.emplace_back(chunk.doneTimeStamp(), chunk.bufferBytes()); + } + } + // Let the tracker handle it. + ProfilerParentTracker::ForwardChildChunkManagerUpdate( + self->mChildPid, + ProfileBufferControlledChunkManager::Update( + aUpdate.unreleasedBytes(), aUpdate.releasedBytes(), + aUpdate.oldestDoneTimeStamp(), std::move(chunks))); + // This was not a final update, so start a new request. + self->RequestChunkManagerUpdate(); + } + }, + [self = RefPtr(this)]( + mozilla::ipc::ResponseRejectReason aReason) { + // Rejection could be for a number of reasons, assume the child will + // not respond anymore, so we pretend we received a final update. + ProfilerParentTracker::ForwardChildChunkManagerUpdate( + self->mChildPid, + ProfileBufferControlledChunkManager::Update(nullptr)); + }); +} + +// Ref-counted class that resolves a promise on destruction. +// Usage: +// RefPtr f() { +// return PromiseResolverOnDestruction::RunTask( +// [](RefPtr aPromiseResolver){ +// // Give *copies* of aPromiseResolver to asynchronous sub-tasks, the +// // last remaining RefPtr destruction will resolve the promise. +// }); +// } +class PromiseResolverOnDestruction { + public: + NS_INLINE_DECL_REFCOUNTING(PromiseResolverOnDestruction) + + template + static RefPtr RunTask(TaskFunction&& aTaskFunction) { + RefPtr promiseResolver = + new PromiseResolverOnDestruction(); + RefPtr promise = + promiseResolver->mPromiseHolder.Ensure(__func__); + std::forward(aTaskFunction)(std::move(promiseResolver)); + return promise; + } + + private: + PromiseResolverOnDestruction() = default; + + ~PromiseResolverOnDestruction() { + mPromiseHolder.ResolveIfExists(/* unused */ true, __func__); + } + + MozPromiseHolder mPromiseHolder; +}; + +// Given a ProfilerParentSendFunction: (ProfilerParent*) -> some MozPromise, +// run the function on all live ProfilerParents and return a GenericPromise, and +// when their promise gets resolve, resolve our Generic promise. +template +static RefPtr SendAndConvertPromise( + ProfilerParentSendFunction&& aProfilerParentSendFunction) { + if (!NS_IsMainThread()) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + return PromiseResolverOnDestruction::RunTask( + [&](RefPtr aPromiseResolver) { + ProfilerParentTracker::Enumerate([&](ProfilerParent* profilerParent) { + std::forward(aProfilerParentSendFunction)( + profilerParent) + ->Then(GetMainThreadSerialEventTarget(), __func__, + [aPromiseResolver]( + typename std::remove_reference_t< + decltype(*std::forward( + aProfilerParentSendFunction)( + profilerParent))>::ResolveOrRejectValue&&) { + // Whatever the resolution/rejection is, do nothing. + // The lambda aPromiseResolver ref-count will decrease. + }); + }); + }); +} + +/* static */ +RefPtr ProfilerParent::ProfilerStarted( + nsIProfilerStartParams* aParams) { + if (!NS_IsMainThread()) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + ProfilerInitParams ipcParams; + double duration; + ipcParams.enabled() = true; + aParams->GetEntries(&ipcParams.entries()); + aParams->GetDuration(&duration); + if (duration > 0.0) { + ipcParams.duration() = Some(duration); + } else { + ipcParams.duration() = Nothing(); + } + aParams->GetInterval(&ipcParams.interval()); + aParams->GetFeatures(&ipcParams.features()); + ipcParams.filters() = aParams->GetFilters().Clone(); + // We need filters as a Span to test pids in the lambda below. + auto filtersCStrings = nsTArray{aParams->GetFilters().Length()}; + for (const auto& filter : aParams->GetFilters()) { + filtersCStrings.AppendElement(filter.Data()); + } + aParams->GetActiveTabID(&ipcParams.activeTabID()); + + ProfilerParentTracker::ProfilerStarted(ipcParams.entries()); + + return SendAndConvertPromise([&](ProfilerParent* profilerParent) { + if (profiler::detail::FiltersExcludePid( + filtersCStrings, + ProfilerProcessId::FromNumber(profilerParent->mChildPid))) { + // This pid is excluded, don't start the profiler at all. + return PProfilerParent::StartPromise::CreateAndResolve(/* unused */ true, + __func__); + } + auto promise = profilerParent->SendStart(ipcParams); + profilerParent->RequestChunkManagerUpdate(); + return promise; + }); +} + +/* static */ +void ProfilerParent::ProfilerWillStopIfStarted() { + if (!NS_IsMainThread()) { + return; + } + + ProfilerParentTracker::ProfilerWillStopIfStarted(); +} + +/* static */ +RefPtr ProfilerParent::ProfilerStopped() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendStop(); + }); +} + +/* static */ +RefPtr ProfilerParent::ProfilerPaused() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendPause(); + }); +} + +/* static */ +RefPtr ProfilerParent::ProfilerResumed() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendResume(); + }); +} + +/* static */ +RefPtr ProfilerParent::ProfilerPausedSampling() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendPauseSampling(); + }); +} + +/* static */ +RefPtr ProfilerParent::ProfilerResumedSampling() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendResumeSampling(); + }); +} + +/* static */ +void ProfilerParent::ClearAllPages() { + if (!NS_IsMainThread()) { + return; + } + + ProfilerParentTracker::Enumerate([](ProfilerParent* profilerParent) { + Unused << profilerParent->SendClearAllPages(); + }); +} + +/* static */ +RefPtr ProfilerParent::WaitOnePeriodicSampling() { + return SendAndConvertPromise([](ProfilerParent* profilerParent) { + return profilerParent->SendWaitOnePeriodicSampling(); + }); +} + +void ProfilerParent::ActorDestroy(ActorDestroyReason aActorDestroyReason) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + mDestroyed = true; +} + +#endif + +} // namespace mozilla diff --git a/tools/profiler/gecko/ProfilerTypes.ipdlh b/tools/profiler/gecko/ProfilerTypes.ipdlh new file mode 100644 index 0000000000..6255d47db0 --- /dev/null +++ b/tools/profiler/gecko/ProfilerTypes.ipdlh @@ -0,0 +1,43 @@ +/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 8 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +using class mozilla::TimeStamp from "mozilla/TimeStamp.h"; +using struct mozilla::ProfileGenerationAdditionalInformation from "ProfileAdditionalInformation.h"; + +namespace mozilla { + +struct ProfilerInitParams { + bool enabled; + uint32_t entries; + double? duration; + double interval; + uint32_t features; + uint64_t activeTabID; + nsCString[] filters; +}; + +struct ProfileBufferChunkMetadata { + TimeStamp doneTimeStamp; + uint32_t bufferBytes; +}; + +struct ProfileBufferChunkManagerUpdate { + uint64_t unreleasedBytes; + uint64_t releasedBytes; + TimeStamp oldestDoneTimeStamp; + ProfileBufferChunkMetadata[] newlyReleasedChunks; +}; + +struct GatherProfileProgress { + uint32_t progressProportionValueUnderlyingType; + nsCString progressLocation; +}; + +struct IPCProfileAndAdditionalInformation { + Shmem profileShmem; + ProfileGenerationAdditionalInformation? additionalInformation; +}; + +} // namespace mozilla diff --git a/tools/profiler/gecko/components.conf b/tools/profiler/gecko/components.conf new file mode 100644 index 0000000000..b1775c37ab --- /dev/null +++ b/tools/profiler/gecko/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'js_name': 'profiler', + 'cid': '{25db9b8e-8123-4de1-b66d-8bbbedf2cdf4}', + 'contract_ids': ['@mozilla.org/tools/profiler;1'], + 'interfaces': ['nsIProfiler'], + 'type': 'nsProfiler', + 'headers': ['/tools/profiler/gecko/nsProfiler.h'], + 'init_method': 'Init', + }, +] diff --git a/tools/profiler/gecko/nsIProfiler.idl b/tools/profiler/gecko/nsIProfiler.idl new file mode 100644 index 0000000000..8b501d4b9f --- /dev/null +++ b/tools/profiler/gecko/nsIProfiler.idl @@ -0,0 +1,208 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +%{C++ +#include "mozilla/Maybe.h" +#include "nsTArrayForwardDeclare.h" +#include "nsStringFwd.h" +#include "mozilla/MozPromise.h" +%} + +[ref] native nsCString(const nsCString); +[ref] native StringArrayRef(const nsTArray); +native ProfileDataBufferMozPromise(RefPtr, nsresult, true>>); + +/** + * Start-up parameters for subprocesses are passed through nsIObserverService, + * which, unfortunately, means we need to implement nsISupports in order to + * go through it. + */ +[scriptable, builtinclass, uuid(0a175ba7-8fcf-4ce9-9c4b-ccc6272f4425)] +interface nsIProfilerStartParams : nsISupports +{ + readonly attribute uint32_t entries; + readonly attribute double duration; + readonly attribute double interval; + readonly attribute uint32_t features; + readonly attribute uint64_t activeTabID; + + [noscript, notxpcom, nostdcall] StringArrayRef getFilters(); +}; + +[scriptable, builtinclass, uuid(ead3f75c-0e0e-4fbb-901c-1e5392ef5b2a)] +interface nsIProfiler : nsISupports +{ + /* + * Control functions return as soon as this process' profiler has done its + * work. The returned promise gets resolved when sub-processes have completed + * their operation, or immediately if there are no sub-processes. + */ + [implicit_jscontext] + Promise StartProfiler(in uint32_t aEntries, in double aInterval, + in Array aFeatures, + [optional] in Array aFilters, + [optional] in uint64_t aActiveTabID, + [optional] in double aDuration); + [implicit_jscontext] + Promise StopProfiler(); + boolean IsPaused(); + [implicit_jscontext] + Promise Pause(); + [implicit_jscontext] + Promise Resume(); + boolean IsSamplingPaused(); + [implicit_jscontext] + Promise PauseSampling(); + [implicit_jscontext] + Promise ResumeSampling(); + + /* + * Resolves the returned promise after at least one full periodic sampling in + * each process. + * Rejects the promise if sampler is not running (yet, or anymore, or paused) + * in the parent process. + * This is mainly useful in tests, to wait just long enough to guarantee that + * at least one sample was taken in each process. + */ + [implicit_jscontext] + Promise waitOnePeriodicSampling(); + + /* + * Returns the JSON string of the profile. If aSinceTime is passed, only + * report samples taken at >= aSinceTime. + */ + string GetProfile([optional] in double aSinceTime); + + /* + * Returns a JS object of the profile. If aSinceTime is passed, only report + * samples taken at >= aSinceTime. + */ + [implicit_jscontext] + jsval getProfileData([optional] in double aSinceTime); + + [implicit_jscontext] + Promise getProfileDataAsync([optional] in double aSinceTime); + + [implicit_jscontext] + Promise getProfileDataAsArrayBuffer([optional] in double aSinceTime); + + [implicit_jscontext] + Promise getProfileDataAsGzippedArrayBuffer([optional] in double aSinceTime); + + /** + * Asynchronously dump the profile collected so far to a file. + * Returns a promise that resolves once the file has been written, with data + * from all responsive Firefox processes. Note: This blocks the parent process + * while collecting its own data, then unblocks while child processes data is + * being collected. + * `aFilename` may be a full path, or a path relative to where Firefox was + * launched. The target directory must already exist. + */ + [implicit_jscontext] + Promise dumpProfileToFileAsync(in ACString aFilename, + [optional] in double aSinceTime); + + /** + * Synchronously dump the profile collected so far in this process to a file. + * This profile will only contain data from the parent process, and from child + * processes that have ended during the session; other currently-live + * processes are ignored. + * `aFilename` may be a full path, or a path relative to where Firefox was + * launched. The target directory must already exist. + */ + void dumpProfileToFile(in string aFilename); + + boolean IsActive(); + + /** + * Clear all registered and unregistered page information in prifiler. + */ + void ClearAllPages(); + + /** + * Returns an array of the features that are supported in this build. + * Features may vary depending on platform and build flags. + */ + Array GetFeatures(); + + /** + * Returns a JavaScript object that contains a description of the currently configured + * state of the profiler when the profiler is active. This can be useful to assert + * the UI of the profiler's recording panel in tests. It returns null when the profiler + * is not active. + */ + [implicit_jscontext] + readonly attribute jsval activeConfiguration; + + /** + * Returns an array of all features that are supported by the profiler. + * The array may contain features that are not supported in this build. + */ + Array GetAllFeatures(); + + void GetBufferInfo(out uint32_t aCurrentPosition, out uint32_t aTotalSize, + out uint32_t aGeneration); + + /** + * Returns the elapsed time, in milliseconds, since the profiler's epoch. + * The epoch is guaranteed to be constant for the duration of the + * process, but is otherwise arbitrary. + */ + double getElapsedTime(); + + /** + * Contains an array of shared library objects. + * Every object has the properties: + * - start: The start address of the memory region occupied by this library. + * - end: The end address of the memory region occupied by this library. + * - offset: Usually zero, except on Linux / Android if the first mapped + * section of the library has been mapped to an address that's + * different from the library's base address. + * Then offset = start - baseAddress. + * - name: The name (file basename) of the binary. + * - path: The full absolute path to the binary. + * - debugName: On Windows, the name of the pdb file for the binary. On other + * platforms, the same as |name|. + * - debugPath: On Windows, the full absolute path of the pdb file for the + * binary. On other platforms, the same as |path|. + * - arch: On Mac, the name of the architecture that identifies the right + * binary image of a fat binary. Example values are "i386", "x86_64", + * and "x86_64h". (x86_64h is used for binaries that contain + * instructions that are specific to the Intel Haswell microarchitecture.) + * On non-Mac platforms, arch is "". + * - breakpadId: A unique identifier string for this library, as used by breakpad. + */ + [implicit_jscontext] + readonly attribute jsval sharedLibraries; + + /** + * Returns a promise that resolves to a SymbolTableAsTuple for the binary at + * the given path. + * + * SymbolTable as tuple: [addrs, index, buffer] + * Contains a symbol table, which can be used to map addresses to strings. + * + * The first element of this tuple, commonly named "addrs", is a sorted array of + * symbol addresses, as library-relative offsets in bytes, in ascending order. + * The third element of this tuple, commonly named "buffer", is a buffer of + * bytes that contains all strings from this symbol table, in the order of the + * addresses they correspond to, in utf-8 encoded form, all concatenated + * together. + * The second element of this tuple, commonly named "index", contains positions + * into "buffer". For every address, that position is where the string for that + * address starts in the buffer. + * index.length == addrs.length + 1. + * index[addrs.length] is the end position of the last string in the buffer. + * + * The string for the address addrs[i] is + * (new TextDecoder()).decode(buffer.subarray(index[i], index[i + 1])) + */ + [implicit_jscontext] + Promise getSymbolTable(in ACString aDebugPath, in ACString aBreakpadID); + + [notxpcom, nostdcall] ProfileDataBufferMozPromise getProfileDataAsGzippedArrayBufferAndroid(in double aSinceTime); +}; diff --git a/tools/profiler/gecko/nsProfiler.cpp b/tools/profiler/gecko/nsProfiler.cpp new file mode 100644 index 0000000000..66e32ce2bc --- /dev/null +++ b/tools/profiler/gecko/nsProfiler.cpp @@ -0,0 +1,1487 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsProfiler.h" + +#include +#include +#include +#include +#include + +#include "GeckoProfiler.h" +#include "ProfilerControl.h" +#include "ProfilerParent.h" +#include "js/Array.h" // JS::NewArrayObject +#include "js/JSON.h" +#include "js/PropertyAndElement.h" // JS_SetElement +#include "js/Value.h" +#include "json/json.h" +#include "mozilla/ErrorResult.h" +#include "mozilla/JSONStringWriteFuncs.h" +#include "mozilla/SchedulerGroup.h" +#include "mozilla/Services.h" +#include "mozilla/dom/Promise.h" +#include "mozilla/dom/TypedArray.h" +#include "mozilla/Preferences.h" +#include "nsComponentManagerUtils.h" +#include "nsIInterfaceRequestor.h" +#include "nsIInterfaceRequestorUtils.h" +#include "nsILoadContext.h" +#include "nsIWebNavigation.h" +#include "nsProfilerStartParams.h" +#include "nsProxyRelease.h" +#include "nsString.h" +#include "nsThreadUtils.h" +#include "platform.h" +#include "shared-libraries.h" +#include "zlib.h" + +#ifndef ANDROID +# include +#else +# include +#endif + +using namespace mozilla; + +using dom::AutoJSAPI; +using dom::Promise; +using std::string; + +static constexpr size_t scLengthMax = size_t(JS::MaxStringLength); +// Used when trying to add more JSON data, to account for the extra space needed +// for the log and to close the profile. +static constexpr size_t scLengthAccumulationThreshold = scLengthMax - 16 * 1024; + +NS_IMPL_ISUPPORTS(nsProfiler, nsIProfiler) + +nsProfiler::nsProfiler() : mGathering(false) {} + +nsProfiler::~nsProfiler() { + if (mSymbolTableThread) { + mSymbolTableThread->Shutdown(); + } + ResetGathering(NS_ERROR_ILLEGAL_DURING_SHUTDOWN); +} + +nsresult nsProfiler::Init() { return NS_OK; } + +template +void nsProfiler::Log(JsonLogObjectUpdater&& aJsonLogObjectUpdater) { + if (mGatheringLog) { + MOZ_ASSERT(mGatheringLog->isObject()); + std::forward(aJsonLogObjectUpdater)(*mGatheringLog); + MOZ_ASSERT(mGatheringLog->isObject()); + } +} + +template +void nsProfiler::LogEvent(JsonArrayAppender&& aJsonArrayAppender) { + Log([&](Json::Value& aRoot) { + Json::Value& events = aRoot[Json::StaticString{"events"}]; + if (!events.isArray()) { + events = Json::Value{Json::arrayValue}; + } + Json::Value newEvent{Json::arrayValue}; + newEvent.append(ProfilingLog::Timestamp()); + std::forward(aJsonArrayAppender)(newEvent); + MOZ_ASSERT(newEvent.isArray()); + events.append(std::move(newEvent)); + }); +} + +void nsProfiler::LogEventLiteralString(const char* aEventString) { + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{aEventString}); + }); +} + +static nsresult FillVectorFromStringArray(Vector& aVector, + const nsTArray& aArray) { + if (NS_WARN_IF(!aVector.reserve(aArray.Length()))) { + return NS_ERROR_OUT_OF_MEMORY; + } + for (auto& entry : aArray) { + aVector.infallibleAppend(entry.get()); + } + return NS_OK; +} + +// Given a PromiseReturningFunction: () -> GenericPromise, +// run the function, and return a JS Promise (through aPromise) that will be +// resolved when the function's GenericPromise gets resolved. +template +static nsresult RunFunctionAndConvertPromise( + JSContext* aCx, Promise** aPromise, + PromiseReturningFunction&& aPromiseReturningFunction) { + MOZ_ASSERT(NS_IsMainThread()); + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + std::forward(aPromiseReturningFunction)()->Then( + GetMainThreadSerialEventTarget(), __func__, + [promise](GenericPromise::ResolveOrRejectValue&&) { + promise->MaybeResolveWithUndefined(); + }); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::StartProfiler(uint32_t aEntries, double aInterval, + const nsTArray& aFeatures, + const nsTArray& aFilters, + uint64_t aActiveTabID, double aDuration, + JSContext* aCx, Promise** aPromise) { + ResetGathering(NS_ERROR_DOM_ABORT_ERR); + + Vector featureStringVector; + nsresult rv = FillVectorFromStringArray(featureStringVector, aFeatures); + if (NS_FAILED(rv)) { + return rv; + } + uint32_t features = ParseFeaturesFromStringArray( + featureStringVector.begin(), featureStringVector.length()); + Maybe duration = aDuration > 0.0 ? Some(aDuration) : Nothing(); + + Vector filterStringVector; + rv = FillVectorFromStringArray(filterStringVector, aFilters); + if (NS_FAILED(rv)) { + return rv; + } + + return RunFunctionAndConvertPromise(aCx, aPromise, [&]() { + return profiler_start(PowerOfTwo32(aEntries), aInterval, features, + filterStringVector.begin(), + filterStringVector.length(), aActiveTabID, duration); + }); +} + +NS_IMETHODIMP +nsProfiler::StopProfiler(JSContext* aCx, Promise** aPromise) { + ResetGathering(NS_ERROR_DOM_ABORT_ERR); + return RunFunctionAndConvertPromise(aCx, aPromise, + []() { return profiler_stop(); }); +} + +NS_IMETHODIMP +nsProfiler::IsPaused(bool* aIsPaused) { + *aIsPaused = profiler_is_paused(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::Pause(JSContext* aCx, Promise** aPromise) { + return RunFunctionAndConvertPromise(aCx, aPromise, + []() { return profiler_pause(); }); +} + +NS_IMETHODIMP +nsProfiler::Resume(JSContext* aCx, Promise** aPromise) { + return RunFunctionAndConvertPromise(aCx, aPromise, + []() { return profiler_resume(); }); +} + +NS_IMETHODIMP +nsProfiler::IsSamplingPaused(bool* aIsSamplingPaused) { + *aIsSamplingPaused = profiler_is_sampling_paused(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::PauseSampling(JSContext* aCx, Promise** aPromise) { + return RunFunctionAndConvertPromise( + aCx, aPromise, []() { return profiler_pause_sampling(); }); +} + +NS_IMETHODIMP +nsProfiler::ResumeSampling(JSContext* aCx, Promise** aPromise) { + return RunFunctionAndConvertPromise( + aCx, aPromise, []() { return profiler_resume_sampling(); }); +} + +NS_IMETHODIMP +nsProfiler::ClearAllPages() { + profiler_clear_all_pages(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::WaitOnePeriodicSampling(JSContext* aCx, Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + // The callback cannot officially own the promise RefPtr directly, because + // `Promise` doesn't support multi-threading, and the callback could destroy + // the promise in the sampler thread. + // `nsMainThreadPtrHandle` ensures that the promise can only be destroyed on + // the main thread. And the invocation from the Sampler thread immediately + // dispatches a task back to the main thread, to resolve/reject the promise. + // The lambda needs to be `mutable`, to allow moving-from + // `promiseHandleInSampler`. + if (!profiler_callback_after_sampling( + [promiseHandleInSampler = nsMainThreadPtrHandle( + new nsMainThreadPtrHolder( + "WaitOnePeriodicSampling promise for Sampler", promise))]( + SamplingState aSamplingState) mutable { + SchedulerGroup::Dispatch( + TaskCategory::Other, + NS_NewRunnableFunction( + "nsProfiler::WaitOnePeriodicSampling result on main thread", + [promiseHandleInMT = std::move(promiseHandleInSampler), + aSamplingState]() mutable { + switch (aSamplingState) { + case SamplingState::JustStopped: + case SamplingState::SamplingPaused: + promiseHandleInMT->MaybeReject(NS_ERROR_FAILURE); + break; + + case SamplingState::NoStackSamplingCompleted: + case SamplingState::SamplingCompleted: + // The parent process has succesfully done a sampling, + // check the child processes (if any). + ProfilerParent::WaitOnePeriodicSampling()->Then( + GetMainThreadSerialEventTarget(), __func__, + [promiseHandleInMT = + std::move(promiseHandleInMT)]( + GenericPromise::ResolveOrRejectValue&&) { + promiseHandleInMT->MaybeResolveWithUndefined(); + }); + break; + + default: + MOZ_ASSERT(false, "Unexpected SamplingState value"); + promiseHandleInMT->MaybeReject( + NS_ERROR_DOM_UNKNOWN_ERR); + break; + } + })); + })) { + // Callback was not added (e.g., profiler is not running) and will never be + // invoked, so we need to resolve the promise here. + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + } + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfile(double aSinceTime, char** aProfile) { + mozilla::UniquePtr profile = profiler_get_profile(aSinceTime); + *aProfile = profile.release(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetSharedLibraries(JSContext* aCx, + JS::MutableHandle aResult) { + JS::Rooted val(aCx); + { + JSONStringWriteFunc buffer; + JSONWriter w(buffer, JSONWriter::SingleLineStyle); + w.StartArrayElement(); + SharedLibraryInfo sharedLibraryInfo = SharedLibraryInfo::GetInfoForSelf(); + sharedLibraryInfo.SortByAddress(); + AppendSharedLibraries(w, sharedLibraryInfo); + w.EndArray(); + NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef()); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, + static_cast(buffer16.get()), + buffer16.Length(), &val)); + } + JS::Rooted obj(aCx, &val.toObject()); + if (!obj) { + return NS_ERROR_FAILURE; + } + aResult.setObject(*obj); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetActiveConfiguration(JSContext* aCx, + JS::MutableHandle aResult) { + JS::Rooted jsValue(aCx); + { + JSONStringWriteFunc buffer; + JSONWriter writer(buffer, JSONWriter::SingleLineStyle); + profiler_write_active_configuration(writer); + NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef()); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, + static_cast(buffer16.get()), + buffer16.Length(), &jsValue)); + } + if (jsValue.isNull()) { + aResult.setNull(); + } else { + JS::Rooted obj(aCx, &jsValue.toObject()); + if (!obj) { + return NS_ERROR_FAILURE; + } + aResult.setObject(*obj); + } + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::DumpProfileToFile(const char* aFilename) { + profiler_save_profile_to_file(aFilename); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileData(double aSinceTime, JSContext* aCx, + JS::MutableHandle aResult) { + mozilla::UniquePtr profile = profiler_get_profile(aSinceTime); + if (!profile) { + return NS_ERROR_FAILURE; + } + + NS_ConvertUTF8toUTF16 js_string(nsDependentCString(profile.get())); + auto profile16 = static_cast(js_string.get()); + + JS::Rooted val(aCx); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, profile16, js_string.Length(), &val)); + + aResult.set(val); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileDataAsync(double aSinceTime, JSContext* aCx, + Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return NS_ERROR_FAILURE; + } + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + StartGathering(aSinceTime) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [promise](const mozilla::ProfileAndAdditionalInformation& aResult) { + AutoJSAPI jsapi; + if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) { + // We're really hosed if we can't get a JS context for some + // reason. + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + return; + } + + JSContext* cx = jsapi.cx(); + + // Now parse the JSON so that we resolve with a JS Object. + JS::Rooted val(cx); + { + NS_ConvertUTF8toUTF16 js_string(aResult.mProfile); + if (!JS_ParseJSON(cx, + static_cast(js_string.get()), + js_string.Length(), &val)) { + if (!jsapi.HasException()) { + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + } else { + JS::Rooted exn(cx); + DebugOnly gotException = jsapi.StealException(&exn); + MOZ_ASSERT(gotException); + + jsapi.ClearException(); + promise->MaybeReject(exn); + } + } else { + promise->MaybeResolve(val); + } + } + }, + [promise](nsresult aRv) { promise->MaybeReject(aRv); }); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileDataAsArrayBuffer(double aSinceTime, JSContext* aCx, + Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return NS_ERROR_FAILURE; + } + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + StartGathering(aSinceTime) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [promise](const mozilla::ProfileAndAdditionalInformation& aResult) { + AutoJSAPI jsapi; + if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) { + // We're really hosed if we can't get a JS context for some + // reason. + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + return; + } + + JSContext* cx = jsapi.cx(); + JSObject* typedArray = dom::ArrayBuffer::Create( + cx, aResult.mProfile.Length(), + reinterpret_cast(aResult.mProfile.Data())); + if (typedArray) { + JS::Rooted val(cx, JS::ObjectValue(*typedArray)); + promise->MaybeResolve(val); + } else { + promise->MaybeReject(NS_ERROR_OUT_OF_MEMORY); + } + }, + [promise](nsresult aRv) { promise->MaybeReject(aRv); }); + + promise.forget(aPromise); + return NS_OK; +} + +nsresult CompressString(const nsCString& aString, + FallibleTArray& aOutBuff) { + // Compress a buffer via zlib (as with `compress()`), but emit a + // gzip header as well. Like `compress()`, this is limited to 4GB in + // size, but that shouldn't be an issue for our purposes. + uLongf outSize = compressBound(aString.Length()); + if (!aOutBuff.SetLength(outSize, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + int zerr; + z_stream stream; + stream.zalloc = nullptr; + stream.zfree = nullptr; + stream.opaque = nullptr; + stream.next_out = (Bytef*)aOutBuff.Elements(); + stream.avail_out = aOutBuff.Length(); + stream.next_in = (z_const Bytef*)aString.Data(); + stream.avail_in = aString.Length(); + + // A windowBits of 31 is the default (15) plus 16 for emitting a + // gzip header; a memLevel of 8 is the default. + zerr = + deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + /* windowBits */ 31, /* memLevel */ 8, Z_DEFAULT_STRATEGY); + if (zerr != Z_OK) { + return NS_ERROR_FAILURE; + } + + zerr = deflate(&stream, Z_FINISH); + outSize = stream.total_out; + deflateEnd(&stream); + + if (zerr != Z_STREAM_END) { + return NS_ERROR_FAILURE; + } + + aOutBuff.TruncateLength(outSize); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileDataAsGzippedArrayBuffer(double aSinceTime, + JSContext* aCx, + Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return NS_ERROR_FAILURE; + } + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + StartGathering(aSinceTime) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [promise](const mozilla::ProfileAndAdditionalInformation& aResult) { + AutoJSAPI jsapi; + if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) { + // We're really hosed if we can't get a JS context for some + // reason. + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + return; + } + + FallibleTArray outBuff; + nsresult result = CompressString(aResult.mProfile, outBuff); + + if (result != NS_OK) { + promise->MaybeReject(result); + return; + } + + JSContext* cx = jsapi.cx(); + // Get the profile typedArray. + JSObject* typedArray = dom::ArrayBuffer::Create( + cx, outBuff.Length(), outBuff.Elements()); + if (!typedArray) { + promise->MaybeReject(NS_ERROR_OUT_OF_MEMORY); + return; + } + JS::Rooted typedArrayValue(cx, + JS::ObjectValue(*typedArray)); + // Get the additional information object. + JS::Rooted additionalInfoVal(cx); + if (aResult.mAdditionalInformation.isSome()) { + aResult.mAdditionalInformation->ToJSValue(cx, &additionalInfoVal); + } else { + additionalInfoVal.setUndefined(); + } + + // Create the return object. + JS::Rooted resultObj(cx, JS_NewPlainObject(cx)); + JS_SetProperty(cx, resultObj, "profile", typedArrayValue); + JS_SetProperty(cx, resultObj, "additionalInformation", + additionalInfoVal); + promise->MaybeResolve(resultObj); + }, + [promise](nsresult aRv) { promise->MaybeReject(aRv); }); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::DumpProfileToFileAsync(const nsACString& aFilename, + double aSinceTime, JSContext* aCx, + Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return NS_ERROR_FAILURE; + } + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx); + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + nsCString filename(aFilename); + + StartGathering(aSinceTime) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [filename, + promise](const mozilla::ProfileAndAdditionalInformation& aResult) { + if (aResult.mProfile.Length() >= + size_t(std::numeric_limits::max())) { + promise->MaybeReject(NS_ERROR_FILE_TOO_BIG); + return; + } + + std::ofstream stream; + stream.open(filename.get()); + if (!stream.is_open()) { + promise->MaybeReject(NS_ERROR_FILE_UNRECOGNIZED_PATH); + return; + } + + stream.write(aResult.mProfile.get(), + std::streamsize(aResult.mProfile.Length())); + stream.close(); + + promise->MaybeResolveWithUndefined(); + }, + [promise](nsresult aRv) { promise->MaybeReject(aRv); }); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetSymbolTable(const nsACString& aDebugPath, + const nsACString& aBreakpadID, JSContext* aCx, + Promise** aPromise) { + MOZ_ASSERT(NS_IsMainThread()); + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* globalObject = + xpc::NativeGlobal(JS::CurrentGlobalOrNull(aCx)); + + if (NS_WARN_IF(!globalObject)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr promise = Promise::Create(globalObject, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + GetSymbolTableMozPromise(aDebugPath, aBreakpadID) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [promise](const SymbolTable& aSymbolTable) { + AutoJSAPI jsapi; + if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) { + // We're really hosed if we can't get a JS context for some + // reason. + promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + return; + } + + JSContext* cx = jsapi.cx(); + + JS::Rooted addrsArray( + cx, dom::Uint32Array::Create(cx, aSymbolTable.mAddrs.Length(), + aSymbolTable.mAddrs.Elements())); + JS::Rooted indexArray( + cx, dom::Uint32Array::Create(cx, aSymbolTable.mIndex.Length(), + aSymbolTable.mIndex.Elements())); + JS::Rooted bufferArray( + cx, dom::Uint8Array::Create(cx, aSymbolTable.mBuffer.Length(), + aSymbolTable.mBuffer.Elements())); + + if (addrsArray && indexArray && bufferArray) { + JS::Rooted tuple(cx, JS::NewArrayObject(cx, 3)); + JS_SetElement(cx, tuple, 0, addrsArray); + JS_SetElement(cx, tuple, 1, indexArray); + JS_SetElement(cx, tuple, 2, bufferArray); + promise->MaybeResolve(tuple); + } else { + promise->MaybeReject(NS_ERROR_FAILURE); + } + }, + [promise](nsresult aRv) { promise->MaybeReject(aRv); }); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetElapsedTime(double* aElapsedTime) { + *aElapsedTime = profiler_time(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::IsActive(bool* aIsActive) { + *aIsActive = profiler_is_active(); + return NS_OK; +} + +static void GetArrayOfStringsForFeatures(uint32_t aFeatures, + nsTArray& aFeatureList) { +#define COUNT_IF_SET(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + len++; \ + } + + // Count the number of features in use. + uint32_t len = 0; + PROFILER_FOR_EACH_FEATURE(COUNT_IF_SET) + +#undef COUNT_IF_SET + + aFeatureList.SetCapacity(len); + +#define DUP_IF_SET(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + aFeatureList.AppendElement(str_); \ + } + + // Insert the strings for the features in use. + PROFILER_FOR_EACH_FEATURE(DUP_IF_SET) + +#undef DUP_IF_SET +} + +NS_IMETHODIMP +nsProfiler::GetFeatures(nsTArray& aFeatureList) { + uint32_t features = profiler_get_available_features(); + GetArrayOfStringsForFeatures(features, aFeatureList); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetAllFeatures(nsTArray& aFeatureList) { + GetArrayOfStringsForFeatures((uint32_t)-1, aFeatureList); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetBufferInfo(uint32_t* aCurrentPosition, uint32_t* aTotalSize, + uint32_t* aGeneration) { + MOZ_ASSERT(aCurrentPosition); + MOZ_ASSERT(aTotalSize); + MOZ_ASSERT(aGeneration); + Maybe info = profiler_get_buffer_info(); + if (info) { + *aCurrentPosition = info->mRangeEnd % info->mEntryCount; + *aTotalSize = info->mEntryCount; + *aGeneration = info->mRangeEnd / info->mEntryCount; + } else { + *aCurrentPosition = 0; + *aTotalSize = 0; + *aGeneration = 0; + } + return NS_OK; +} + +bool nsProfiler::SendProgressRequest(PendingProfile& aPendingProfile) { + RefPtr progressPromise = + ProfilerParent::RequestGatherProfileProgress(aPendingProfile.childPid); + if (!progressPromise) { + LOG("RequestGatherProfileProgress(%u) -> null!", + unsigned(aPendingProfile.childPid)); + LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Failed to send progress request to pid:"}); + aEvent.append(Json::Value::UInt64(aPendingProfile.childPid)); + }); + // Failed to send request. + return false; + } + + DEBUG_LOG("RequestGatherProfileProgress(%u) sent...", + unsigned(aPendingProfile.childPid)); + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{"Requested progress from pid:"}); + aEvent.append(Json::Value::UInt64(aPendingProfile.childPid)); + }); + aPendingProfile.lastProgressRequest = TimeStamp::Now(); + progressPromise->Then( + GetMainThreadSerialEventTarget(), __func__, + [self = RefPtr(this), + childPid = aPendingProfile.childPid](GatherProfileProgress&& aResult) { + if (!self->mGathering) { + return; + } + PendingProfile* pendingProfile = self->GetPendingProfile(childPid); + DEBUG_LOG( + "RequestGatherProfileProgress(%u) response: %.2f '%s' " + "(%u were pending, %s %u)", + unsigned(childPid), + ProportionValue::FromUnderlyingType( + aResult.progressProportionValueUnderlyingType()) + .ToDouble() * + 100.0, + aResult.progressLocation().Data(), + unsigned(self->mPendingProfiles.length()), + pendingProfile ? "including" : "excluding", unsigned(childPid)); + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Got response from pid, with progress:"}); + aEvent.append(Json::Value::UInt64(childPid)); + aEvent.append( + Json::Value{ProportionValue::FromUnderlyingType( + aResult.progressProportionValueUnderlyingType()) + .ToDouble() * + 100.0}); + }); + if (pendingProfile) { + // We have a progress report for a still-pending profile. + pendingProfile->lastProgressResponse = TimeStamp::Now(); + // Has it actually made progress? + if (aResult.progressProportionValueUnderlyingType() != + pendingProfile->progressProportion.ToUnderlyingType()) { + pendingProfile->lastProgressChange = + pendingProfile->lastProgressResponse; + pendingProfile->progressProportion = + ProportionValue::FromUnderlyingType( + aResult.progressProportionValueUnderlyingType()); + pendingProfile->progressLocation = aResult.progressLocation(); + self->RestartGatheringTimer(); + } + } + }, + [self = RefPtr(this), childPid = aPendingProfile.childPid]( + ipc::ResponseRejectReason&& aReason) { + if (!self->mGathering) { + return; + } + PendingProfile* pendingProfile = self->GetPendingProfile(childPid); + LOG("RequestGatherProfileProgress(%u) rejection: %d " + "(%u were pending, %s %u)", + unsigned(childPid), (int)aReason, + unsigned(self->mPendingProfiles.length()), + pendingProfile ? "including" : "excluding", unsigned(childPid)); + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{ + "Got progress request rejection from pid, with reason:"}); + aEvent.append(Json::Value::UInt64(childPid)); + aEvent.append(Json::Value::UInt{static_cast(aReason)}); + }); + if (pendingProfile) { + // Failure response, assume the child process is gone. + MOZ_ASSERT(self->mPendingProfiles.begin() <= pendingProfile && + pendingProfile < self->mPendingProfiles.end()); + self->mPendingProfiles.erase(pendingProfile); + if (self->mPendingProfiles.empty()) { + // We've got all of the async profiles now. Let's finish off the + // profile and resolve the Promise. + self->FinishGathering(); + } + } + }); + return true; +} + +/* static */ void nsProfiler::GatheringTimerCallback(nsITimer* aTimer, + void* aClosure) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + nsCOMPtr profiler( + do_GetService("@mozilla.org/tools/profiler;1")); + if (!profiler) { + // No (more) profiler service. + return; + } + nsProfiler* self = static_cast(profiler.get()); + if (self != aClosure) { + // Different service object!? + return; + } + if (aTimer != self->mGatheringTimer) { + // This timer was cancelled after this callback was queued. + return; + } + + bool progressWasMade = false; + + // Going backwards, it's easier and cheaper to erase elements if needed. + for (auto iPlus1 = self->mPendingProfiles.length(); iPlus1 != 0; --iPlus1) { + PendingProfile& pendingProfile = self->mPendingProfiles[iPlus1 - 1]; + + bool needToSendProgressRequest = false; + if (pendingProfile.lastProgressRequest.IsNull()) { + DEBUG_LOG("GatheringTimerCallback() - child %u: No data yet", + unsigned(pendingProfile.childPid)); + // First time going through the list, send an initial progress request. + needToSendProgressRequest = true; + // We pretend that progress was made, so we don't give up yet. + progressWasMade = true; + } else if (pendingProfile.lastProgressResponse.IsNull()) { + LOG("GatheringTimerCallback() - child %u: Waiting for first response", + unsigned(pendingProfile.childPid)); + // Still waiting for the first response, no progress made here, don't send + // another request. + } else if (pendingProfile.lastProgressResponse <= + pendingProfile.lastProgressRequest) { + LOG("GatheringTimerCallback() - child %u: Waiting for response", + unsigned(pendingProfile.childPid)); + // Still waiting for a response to the last request, no progress made + // here, don't send another request. + } else if (pendingProfile.lastProgressChange.IsNull()) { + LOG("GatheringTimerCallback() - child %u: Still waiting for first change", + unsigned(pendingProfile.childPid)); + // Still waiting for the first change, no progress made here, but send a + // new request. + needToSendProgressRequest = true; + } else if (pendingProfile.lastProgressRequest < + pendingProfile.lastProgressChange) { + DEBUG_LOG("GatheringTimerCallback() - child %u: Recent change", + unsigned(pendingProfile.childPid)); + // We have a recent change, progress was made. + needToSendProgressRequest = true; + progressWasMade = true; + } else { + LOG("GatheringTimerCallback() - child %u: No recent change", + unsigned(pendingProfile.childPid)); + needToSendProgressRequest = true; + } + + // And send a new progress request. + if (needToSendProgressRequest) { + if (!self->SendProgressRequest(pendingProfile)) { + // Failed to even send the request, consider this process gone. + self->mPendingProfiles.erase(&pendingProfile); + LOG("... Failed to send progress request"); + } else { + DEBUG_LOG("... Sent progress request"); + } + } else { + DEBUG_LOG("... No progress request"); + } + } + + if (self->mPendingProfiles.empty()) { + // We've got all of the async profiles now. Let's finish off the profile + // and resolve the Promise. + self->FinishGathering(); + return; + } + + // Not finished yet. + + if (progressWasMade) { + // We made some progress, just restart the timer. + DEBUG_LOG("GatheringTimerCallback() - Progress made, restart timer"); + self->RestartGatheringTimer(); + return; + } + + DEBUG_LOG("GatheringTimerCallback() - Timeout!"); + self->mGatheringTimer = nullptr; + if (!profiler_is_active() || !self->mGathering) { + // Not gathering anymore. + return; + } + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{ + "No progress made recently, giving up; pending pids:"}); + for (const PendingProfile& pendingProfile : self->mPendingProfiles) { + aEvent.append(Json::Value::UInt64(pendingProfile.childPid)); + } + }); + NS_WARNING("Profiler failed to gather profiles from all sub-processes"); + // We have really reached a timeout while gathering, finish now. + // TODO: Add information about missing processes. + self->FinishGathering(); +} + +void nsProfiler::RestartGatheringTimer() { + if (mGatheringTimer) { + uint32_t delayMs = 0; + const nsresult r = mGatheringTimer->GetDelay(&delayMs); + mGatheringTimer->Cancel(); + if (NS_FAILED(r) || delayMs == 0 || + NS_FAILED(mGatheringTimer->InitWithNamedFuncCallback( + GatheringTimerCallback, this, delayMs, + nsITimer::TYPE_ONE_SHOT_LOW_PRIORITY, + "nsProfilerGatheringTimer"))) { + // Can't restart the timer, so we can't wait any longer. + FinishGathering(); + } + } +} + +nsProfiler::PendingProfile* nsProfiler::GetPendingProfile( + base::ProcessId aChildPid) { + for (PendingProfile& pendingProfile : mPendingProfiles) { + if (pendingProfile.childPid == aChildPid) { + return &pendingProfile; + } + } + return nullptr; +} + +void nsProfiler::GatheredOOPProfile( + base::ProcessId aChildPid, const nsACString& aProfile, + mozilla::Maybe&& + aAdditionalInformation) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return; + } + + if (!mGathering) { + // If we're not actively gathering, then we don't actually care that we + // gathered a profile here. This can happen for processes that exit while + // profiling. + return; + } + + MOZ_RELEASE_ASSERT(mWriter.isSome(), + "Should always have a writer if mGathering is true"); + + // Combine all the additional information into a single struct. + if (aAdditionalInformation.isSome()) { + mProfileGenerationAdditionalInformation->Append( + std::move(*aAdditionalInformation)); + } + + if (!aProfile.IsEmpty()) { + if (mWriter->ChunkedWriteFunc().Length() + aProfile.Length() < + scLengthAccumulationThreshold) { + // TODO: Remove PromiseFlatCString, see bug 1657033. + mWriter->Splice(PromiseFlatCString(aProfile)); + } else { + LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Discarded child profile that would make the " + "full profile too big, pid and size:"}); + aEvent.append(Json::Value::UInt64(aChildPid)); + aEvent.append(Json::Value::UInt64{aProfile.Length()}); + }); + } + } + + if (PendingProfile* pendingProfile = GetPendingProfile(aChildPid); + pendingProfile) { + mPendingProfiles.erase(pendingProfile); + + if (mPendingProfiles.empty()) { + // We've got all of the async profiles now. Let's finish off the profile + // and resolve the Promise. + FinishGathering(); + } + } + + // Not finished yet, restart the timer to let any remaining child enough time + // to do their profile-streaming. + RestartGatheringTimer(); +} + +RefPtr +nsProfiler::GetProfileDataAsGzippedArrayBufferAndroid(double aSinceTime) { + MOZ_ASSERT(NS_IsMainThread()); + + if (!profiler_is_active()) { + return GatheringPromiseAndroid::CreateAndReject(NS_ERROR_FAILURE, __func__); + } + + return StartGathering(aSinceTime) + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [](const mozilla::ProfileAndAdditionalInformation& aResult) { + FallibleTArray outBuff; + nsresult result = CompressString(aResult.mProfile, outBuff); + if (result != NS_OK) { + return GatheringPromiseAndroid::CreateAndReject(result, __func__); + } + return GatheringPromiseAndroid::CreateAndResolve(std::move(outBuff), + __func__); + }, + [](nsresult aRv) { + return GatheringPromiseAndroid::CreateAndReject(aRv, __func__); + }); +} + +RefPtr nsProfiler::StartGathering( + double aSinceTime) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + if (mGathering) { + // If we're already gathering, return a rejected promise - this isn't + // going to end well. + return GatheringPromise::CreateAndReject(NS_ERROR_NOT_AVAILABLE, __func__); + } + + mGathering = true; + mGatheringLog = mozilla::MakeUnique(Json::objectValue); + (*mGatheringLog)[Json::StaticString{ + "profileGatheringLogBegin" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + + if (mGatheringTimer) { + mGatheringTimer->Cancel(); + mGatheringTimer = nullptr; + } + + // Start building shared library info starting from the current process. + mProfileGenerationAdditionalInformation.emplace( + SharedLibraryInfo::GetInfoForSelf()); + + // Request profiles from the other processes. This will trigger asynchronous + // calls to ProfileGatherer::GatheredOOPProfile as the profiles arrive. + // + // Do this before the call to profiler_stream_json_for_this_process() because + // that call is slow and we want to let the other processes grab their + // profiles as soon as possible. + nsTArray profiles = + ProfilerParent::GatherProfiles(); + + MOZ_ASSERT(mPendingProfiles.empty()); + if (!mPendingProfiles.reserve(profiles.Length())) { + ResetGathering(NS_ERROR_OUT_OF_MEMORY); + return GatheringPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + + mFailureLatchSource.emplace(); + mWriter.emplace(*mFailureLatchSource); + + UniquePtr service = + profiler_code_address_service_for_presymbolication(); + + // Start building up the JSON result and grab the profile from this process. + mWriter->Start(); + auto rv = profiler_stream_json_for_this_process(*mWriter, aSinceTime, + /* aIsShuttingDown */ false, + service.get()); + if (rv.isErr()) { + // The profiler is inactive. This either means that it was inactive even + // at the time that ProfileGatherer::Start() was called, or that it was + // stopped on a different thread since that call. Either way, we need to + // reject the promise and stop gathering. + ResetGathering(NS_ERROR_NOT_AVAILABLE); + return GatheringPromise::CreateAndReject(NS_ERROR_NOT_AVAILABLE, __func__); + } + + LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Generated parent process profile, size:"}); + aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()}); + }); + + mWriter->StartArrayProperty("processes"); + + // If we have any process exit profiles, add them immediately. + if (Vector exitProfiles = profiler_move_exit_profiles(); + !exitProfiles.empty()) { + for (auto& exitProfile : exitProfiles) { + if (!exitProfile.IsEmpty()) { + if (exitProfile[0] == '*') { + LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Exit non-profile with error message:"}); + aEvent.append(exitProfile.Data() + 1); + }); + } else if (mWriter->ChunkedWriteFunc().Length() + exitProfile.Length() < + scLengthAccumulationThreshold) { + mWriter->Splice(exitProfile); + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{"Added exit profile with size:"}); + aEvent.append(Json::Value::UInt64{exitProfile.Length()}); + }); + } else { + LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Discarded an exit profile that would make " + "the full profile too big, size:"}); + aEvent.append(Json::Value::UInt64{exitProfile.Length()}); + }); + } + } + } + + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{ + "Processed all exit profiles, total size so far:"}); + aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()}); + }); + } else { + // There are no pending profiles, we're already done. + LogEventLiteralString("No exit profiles."); + } + + mPromiseHolder.emplace(); + RefPtr promise = mPromiseHolder->Ensure(__func__); + + // Keep the array property "processes" and the root object in mWriter open + // until FinishGathering() is called. As profiles from the other processes + // come in, they will be inserted and end up in the right spot. + // FinishGathering() will close the array and the root object. + + if (!profiles.IsEmpty()) { + // There *are* pending profiles, let's add handlers for their promises. + + // This timeout value is used to monitor progress while gathering child + // profiles. The timer will be restarted after we receive a response with + // any progress. + constexpr uint32_t cMinChildTimeoutS = 1u; // 1 second minimum and default. + constexpr uint32_t cMaxChildTimeoutS = 60u; // 1 minute max. + uint32_t childTimeoutS = Preferences::GetUint( + "devtools.performance.recording.child.timeout_s", cMinChildTimeoutS); + if (childTimeoutS < cMinChildTimeoutS) { + childTimeoutS = cMinChildTimeoutS; + } else if (childTimeoutS > cMaxChildTimeoutS) { + childTimeoutS = cMaxChildTimeoutS; + } + const uint32_t childTimeoutMs = childTimeoutS * PR_MSEC_PER_SEC; + Unused << NS_NewTimerWithFuncCallback( + getter_AddRefs(mGatheringTimer), GatheringTimerCallback, this, + childTimeoutMs, nsITimer::TYPE_ONE_SHOT_LOW_PRIORITY, + "nsProfilerGatheringTimer", GetMainThreadSerialEventTarget()); + + MOZ_ASSERT(mPendingProfiles.capacity() >= profiles.Length()); + for (const auto& profile : profiles) { + mPendingProfiles.infallibleAppend(PendingProfile{profile.childPid}); + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{"Waiting for pending profile, pid:"}); + aEvent.append(Json::Value::UInt64(profile.childPid)); + }); + profile.profilePromise->Then( + GetMainThreadSerialEventTarget(), __func__, + [self = RefPtr(this), childPid = profile.childPid]( + IPCProfileAndAdditionalInformation&& aResult) { + PendingProfile* pendingProfile = self->GetPendingProfile(childPid); + mozilla::ipc::Shmem profileShmem = aResult.profileShmem(); + LOG("GatherProfile(%u) response: %u bytes (%u were pending, %s %u)", + unsigned(childPid), unsigned(profileShmem.Size()), + unsigned(self->mPendingProfiles.length()), + pendingProfile ? "including" : "excluding", unsigned(childPid)); + if (profileShmem.IsReadable()) { + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Got profile from pid, with size:"}); + aEvent.append(Json::Value::UInt64(childPid)); + aEvent.append(Json::Value::UInt64{profileShmem.Size()}); + }); + const nsDependentCSubstring profileString( + profileShmem.get(), profileShmem.Size() - 1); + if (profileString.IsEmpty() || profileString[0] != '*') { + self->GatheredOOPProfile( + childPid, profileString, + std::move(aResult.additionalInformation())); + } else { + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{ + "Child non-profile from pid, with error message:"}); + aEvent.append(Json::Value::UInt64(childPid)); + aEvent.append(profileString.Data() + 1); + }); + self->GatheredOOPProfile(childPid, ""_ns, Nothing()); + } + } else { + // This can happen if the child failed to allocate + // the Shmem (or maliciously sent an invalid Shmem). + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{"Got failure from pid:"}); + aEvent.append(Json::Value::UInt64(childPid)); + }); + self->GatheredOOPProfile(childPid, ""_ns, Nothing()); + } + }, + [self = RefPtr(this), + childPid = profile.childPid](ipc::ResponseRejectReason&& aReason) { + PendingProfile* pendingProfile = self->GetPendingProfile(childPid); + LOG("GatherProfile(%u) rejection: %d (%u were pending, %s %u)", + unsigned(childPid), (int)aReason, + unsigned(self->mPendingProfiles.length()), + pendingProfile ? "including" : "excluding", unsigned(childPid)); + self->LogEvent([&](Json::Value& aEvent) { + aEvent.append( + Json::StaticString{"Got rejection from pid, with reason:"}); + aEvent.append(Json::Value::UInt64(childPid)); + aEvent.append(Json::Value::UInt{static_cast(aReason)}); + }); + self->GatheredOOPProfile(childPid, ""_ns, Nothing()); + }); + } + } else { + // There are no pending profiles, we're already done. + LogEventLiteralString("No pending child profiles."); + FinishGathering(); + } + + return promise; +} + +RefPtr nsProfiler::GetSymbolTableMozPromise( + const nsACString& aDebugPath, const nsACString& aBreakpadID) { + MozPromiseHolder promiseHolder; + RefPtr promise = promiseHolder.Ensure(__func__); + + if (!mSymbolTableThread) { + nsresult rv = NS_NewNamedThread("ProfSymbolTable", + getter_AddRefs(mSymbolTableThread)); + if (NS_WARN_IF(NS_FAILED(rv))) { + promiseHolder.Reject(NS_ERROR_FAILURE, __func__); + return promise; + } + } + + nsresult rv = mSymbolTableThread->Dispatch(NS_NewRunnableFunction( + "nsProfiler::GetSymbolTableMozPromise runnable on ProfSymbolTable thread", + [promiseHolder = std::move(promiseHolder), + debugPath = nsCString(aDebugPath), + breakpadID = nsCString(aBreakpadID)]() mutable { + AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING("profiler_get_symbol_table", + OTHER, debugPath); + SymbolTable symbolTable; + bool succeeded = profiler_get_symbol_table( + debugPath.get(), breakpadID.get(), &symbolTable); + if (succeeded) { + promiseHolder.Resolve(std::move(symbolTable), __func__); + } else { + promiseHolder.Reject(NS_ERROR_FAILURE, __func__); + } + })); + + if (NS_WARN_IF(NS_FAILED(rv))) { + // Get-symbol task was not dispatched and therefore won't fulfill the + // promise, we must reject the promise now. + promiseHolder.Reject(NS_ERROR_FAILURE, __func__); + } + + return promise; +} + +void nsProfiler::FinishGathering() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_RELEASE_ASSERT(mWriter.isSome()); + MOZ_RELEASE_ASSERT(mPromiseHolder.isSome()); + MOZ_RELEASE_ASSERT(mProfileGenerationAdditionalInformation.isSome()); + + // Close the "processes" array property. + mWriter->EndArray(); + + if (mGatheringLog) { + LogEvent([&](Json::Value& aEvent) { + aEvent.append(Json::StaticString{"Finished gathering, total size:"}); + aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()}); + }); + (*mGatheringLog)[Json::StaticString{ + "profileGatheringLogEnd" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + mWriter->StartObjectProperty("profileGatheringLog"); + { + nsAutoCString pid; + pid.AppendInt(int64_t(profiler_current_process_id().ToNumber())); + Json::String logString = ToCompactString(*mGatheringLog); + mGatheringLog = nullptr; + mWriter->SplicedJSONProperty(pid, logString); + } + mWriter->EndObject(); + } + + // Close the root object of the generated JSON. + mWriter->End(); + + if (const char* failure = mWriter->GetFailure(); failure) { +#ifndef ANDROID + fprintf(stderr, "JSON generation failure: %s", failure); +#else + __android_log_print(ANDROID_LOG_INFO, "GeckoProfiler", + "JSON generation failure: %s", failure); +#endif + NS_WARNING("Error during JSON generation, probably OOM."); + ResetGathering(NS_ERROR_OUT_OF_MEMORY); + return; + } + + // And try to resolve the promise with the profile JSON. + const size_t len = mWriter->ChunkedWriteFunc().Length(); + if (len >= scLengthMax) { + NS_WARNING("Profile JSON is too big to fit in a string."); + ResetGathering(NS_ERROR_FILE_TOO_BIG); + return; + } + + nsCString result; + if (!result.SetLength(len, fallible)) { + NS_WARNING("Cannot allocate a string for the Profile JSON."); + ResetGathering(NS_ERROR_OUT_OF_MEMORY); + return; + } + MOZ_ASSERT(*(result.Data() + len) == '\0', + "We expected a null at the end of the string buffer, to be " + "rewritten by CopyDataIntoLazilyAllocatedBuffer"); + + char* const resultBeginWriting = result.BeginWriting(); + if (!resultBeginWriting) { + NS_WARNING("Cannot access the string to write the Profile JSON."); + ResetGathering(NS_ERROR_CACHE_WRITE_ACCESS_DENIED); + return; + } + + // Here, we have enough space reserved in `result`, starting at + // `resultBeginWriting`, copy the JSON profile there. + if (!mWriter->ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer( + [&](size_t aBufferLen) -> char* { + MOZ_RELEASE_ASSERT(aBufferLen == len + 1); + return resultBeginWriting; + })) { + NS_WARNING("Could not copy profile JSON, probably OOM."); + ResetGathering(NS_ERROR_FILE_TOO_BIG); + return; + } + MOZ_ASSERT(*(result.Data() + len) == '\0', + "We still expected a null at the end of the string buffer"); + + mProfileGenerationAdditionalInformation->FinishGathering(); + mPromiseHolder->Resolve( + ProfileAndAdditionalInformation{ + std::move(result), + std::move(*mProfileGenerationAdditionalInformation)}, + __func__); + + ResetGathering(NS_ERROR_UNEXPECTED); +} + +void nsProfiler::ResetGathering(nsresult aPromiseRejectionIfPending) { + // If we have an unfulfilled Promise in flight, we should reject it before + // destroying the promise holder. + if (mPromiseHolder.isSome()) { + mPromiseHolder->RejectIfExists(aPromiseRejectionIfPending, __func__); + mPromiseHolder.reset(); + } + mPendingProfiles.clearAndFree(); + mGathering = false; + mGatheringLog = nullptr; + if (mGatheringTimer) { + mGatheringTimer->Cancel(); + mGatheringTimer = nullptr; + } + mWriter.reset(); + mFailureLatchSource.reset(); + mProfileGenerationAdditionalInformation.reset(); +} diff --git a/tools/profiler/gecko/nsProfiler.h b/tools/profiler/gecko/nsProfiler.h new file mode 100644 index 0000000000..3757df3079 --- /dev/null +++ b/tools/profiler/gecko/nsProfiler.h @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsProfiler_h +#define nsProfiler_h + +#include "base/process.h" +#include "mozilla/Attributes.h" +#include "mozilla/Maybe.h" +#include "mozilla/MozPromise.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProportionValue.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "nsIProfiler.h" +#include "nsITimer.h" +#include "nsServiceManagerUtils.h" +#include "ProfilerCodeAddressService.h" +#include "ProfileAdditionalInformation.h" + +namespace Json { +class Value; +} // namespace Json + +class nsProfiler final : public nsIProfiler { + public: + nsProfiler(); + + NS_DECL_ISUPPORTS + NS_DECL_NSIPROFILER + + nsresult Init(); + + static nsProfiler* GetOrCreate() { + nsCOMPtr iprofiler = + do_GetService("@mozilla.org/tools/profiler;1"); + return static_cast(iprofiler.get()); + } + + private: + ~nsProfiler(); + + using GatheringPromiseAndroid = + mozilla::MozPromise, nsresult, true>; + using GatheringPromise = + mozilla::MozPromise; + using SymbolTablePromise = + mozilla::MozPromise; + + RefPtr StartGathering(double aSinceTime); + void GatheredOOPProfile( + base::ProcessId aChildPid, const nsACString& aProfile, + mozilla::Maybe&& + aAdditionalInformation); + void FinishGathering(); + void ResetGathering(nsresult aPromiseRejectionIfPending); + static void GatheringTimerCallback(nsITimer* aTimer, void* aClosure); + void RestartGatheringTimer(); + + RefPtr GetSymbolTableMozPromise( + const nsACString& aDebugPath, const nsACString& aBreakpadID); + + struct ExitProfile { + nsCString mJSON; + uint64_t mBufferPositionAtGatherTime; + }; + + struct PendingProfile { + base::ProcessId childPid; + + mozilla::ProportionValue progressProportion; + nsCString progressLocation; + + mozilla::TimeStamp lastProgressRequest; + mozilla::TimeStamp lastProgressResponse; + mozilla::TimeStamp lastProgressChange; + + explicit PendingProfile(base::ProcessId aChildPid) : childPid(aChildPid) {} + }; + + PendingProfile* GetPendingProfile(base::ProcessId aChildPid); + // Returns false if the request could not be sent. + bool SendProgressRequest(PendingProfile& aPendingProfile); + + // If the log is active, call aJsonLogObjectUpdater(Json::Value&) on the log's + // root object. + template + void Log(JsonLogObjectUpdater&& aJsonLogObjectUpdater); + // If the log is active, call aJsonArrayAppender(Json::Value&) on a Json + // array that already contains a timestamp, and to which event-related + // elements may be appended. + template + void LogEvent(JsonArrayAppender&& aJsonArrayAppender); + void LogEventLiteralString(const char* aEventString); + + // These fields are all related to profile gathering. + mozilla::Vector mExitProfiles; + mozilla::Maybe> mPromiseHolder; + nsCOMPtr mSymbolTableThread; + mozilla::Maybe mFailureLatchSource; + mozilla::Maybe mWriter; + mozilla::Maybe + mProfileGenerationAdditionalInformation; + mozilla::Vector mPendingProfiles; + bool mGathering; + nsCOMPtr mGatheringTimer; + // Supplemental log to the profiler's "profilingLog" (which has already been + // completed in JSON profiles that are gathered). + mozilla::UniquePtr mGatheringLog; +}; + +#endif // nsProfiler_h diff --git a/tools/profiler/gecko/nsProfilerCIID.h b/tools/profiler/gecko/nsProfilerCIID.h new file mode 100644 index 0000000000..3df44596b1 --- /dev/null +++ b/tools/profiler/gecko/nsProfilerCIID.h @@ -0,0 +1,16 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsProfilerCIID_h__ +#define nsProfilerCIID_h__ + +#define NS_PROFILER_CID \ + { \ + 0x25db9b8e, 0x8123, 0x4de1, { \ + 0xb6, 0x6d, 0x8b, 0xbb, 0xed, 0xf2, 0xcd, 0xf4 \ + } \ + } + +#endif diff --git a/tools/profiler/gecko/nsProfilerStartParams.cpp b/tools/profiler/gecko/nsProfilerStartParams.cpp new file mode 100644 index 0000000000..dd7c3f4ab7 --- /dev/null +++ b/tools/profiler/gecko/nsProfilerStartParams.cpp @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsProfilerStartParams.h" +#include "ipc/IPCMessageUtils.h" + +NS_IMPL_ISUPPORTS(nsProfilerStartParams, nsIProfilerStartParams) + +nsProfilerStartParams::nsProfilerStartParams( + uint32_t aEntries, const mozilla::Maybe& aDuration, + double aInterval, uint32_t aFeatures, nsTArray&& aFilters, + uint64_t aActiveTabID) + : mEntries(aEntries), + mDuration(aDuration), + mInterval(aInterval), + mFeatures(aFeatures), + mFilters(std::move(aFilters)), + mActiveTabID(aActiveTabID) {} + +nsProfilerStartParams::~nsProfilerStartParams() {} + +NS_IMETHODIMP +nsProfilerStartParams::GetEntries(uint32_t* aEntries) { + NS_ENSURE_ARG_POINTER(aEntries); + *aEntries = mEntries; + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::GetDuration(double* aDuration) { + NS_ENSURE_ARG_POINTER(aDuration); + if (mDuration) { + *aDuration = *mDuration; + } else { + *aDuration = 0; + } + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::GetInterval(double* aInterval) { + NS_ENSURE_ARG_POINTER(aInterval); + *aInterval = mInterval; + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::GetFeatures(uint32_t* aFeatures) { + NS_ENSURE_ARG_POINTER(aFeatures); + *aFeatures = mFeatures; + return NS_OK; +} + +const nsTArray& nsProfilerStartParams::GetFilters() { + return mFilters; +} + +NS_IMETHODIMP +nsProfilerStartParams::GetActiveTabID(uint64_t* aActiveTabID) { + NS_ENSURE_ARG_POINTER(aActiveTabID); + *aActiveTabID = mActiveTabID; + return NS_OK; +} diff --git a/tools/profiler/gecko/nsProfilerStartParams.h b/tools/profiler/gecko/nsProfilerStartParams.h new file mode 100644 index 0000000000..25c2b5082f --- /dev/null +++ b/tools/profiler/gecko/nsProfilerStartParams.h @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _NSPROFILERSTARTPARAMS_H_ +#define _NSPROFILERSTARTPARAMS_H_ + +#include "nsIProfiler.h" +#include "nsString.h" +#include "nsTArray.h" + +class nsProfilerStartParams : public nsIProfilerStartParams { + public: + // This class can be used on multiple threads. For example, it's used for the + // observer notification from profiler_start, which can run on any thread but + // posts the notification to the main thread. + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIPROFILERSTARTPARAMS + + nsProfilerStartParams(uint32_t aEntries, + const mozilla::Maybe& aDuration, + double aInterval, uint32_t aFeatures, + nsTArray&& aFilters, uint64_t aActiveTabID); + + private: + virtual ~nsProfilerStartParams(); + uint32_t mEntries; + mozilla::Maybe mDuration; + double mInterval; + uint32_t mFeatures; + nsTArray mFilters; + uint64_t mActiveTabID; +}; + +#endif diff --git a/tools/profiler/lul/AutoObjectMapper.cpp b/tools/profiler/lul/AutoObjectMapper.cpp new file mode 100644 index 0000000000..f7489fbfee --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.cpp @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "AutoObjectMapper.h" + +// A helper function for creating failure error messages in +// AutoObjectMapper*::Map. +static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed, + std::string aFileName) { + char buf[300]; + SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed, + aFileName.c_str()); + buf[sizeof(buf) - 1] = 0; + aLog(buf); +} + +AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*)) + : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {} + +AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() { + if (!mIsMapped) { + // There's nothing to do. + MOZ_ASSERT(!mImage); + MOZ_ASSERT(mSize == 0); + return; + } + MOZ_ASSERT(mSize > 0); + // The following assertion doesn't necessarily have to be true, + // but we assume (reasonably enough) that no mmap facility would + // be crazy enough to map anything at page zero. + MOZ_ASSERT(mImage); + munmap(mImage, mSize); +} + +bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length, + std::string fileName) { + MOZ_ASSERT(!mIsMapped); + + int fd = open(fileName.c_str(), O_RDONLY); + if (fd == -1) { + failedToMessage(mLog, "open", fileName); + return false; + } + + struct stat st; + int err = fstat(fd, &st); + size_t sz = (err == 0) ? st.st_size : 0; + if (err != 0 || sz == 0) { + failedToMessage(mLog, "fstat", fileName); + close(fd); + return false; + } + + void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0); + if (image == MAP_FAILED) { + failedToMessage(mLog, "mmap", fileName); + close(fd); + return false; + } + + close(fd); + mIsMapped = true; + mImage = *start = image; + mSize = *length = sz; + return true; +} diff --git a/tools/profiler/lul/AutoObjectMapper.h b/tools/profiler/lul/AutoObjectMapper.h new file mode 100644 index 0000000000..f63aa43e0e --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AutoObjectMapper_h +#define AutoObjectMapper_h + +#include + +#include "mozilla/Attributes.h" +#include "PlatformMacros.h" + +// A (nearly-) RAII class that maps an object in and then unmaps it on +// destruction. This base class version uses the "normal" POSIX +// functions: open, fstat, close, mmap, munmap. + +class MOZ_STACK_CLASS AutoObjectMapperPOSIX { + public: + // The constructor does not attempt to map the file, because that + // might fail. Instead, once the object has been constructed, + // call Map() to attempt the mapping. There is no corresponding + // Unmap() since the unmapping is done in the destructor. Failure + // messages are sent to |aLog|. + explicit AutoObjectMapperPOSIX(void (*aLog)(const char*)); + + // Unmap the file on destruction of this object. + ~AutoObjectMapperPOSIX(); + + // Map |fileName| into the address space and return the mapping + // extents. If the file is zero sized this will fail. The file is + // mapped read-only and private. Returns true iff the mapping + // succeeded, in which case *start and *length hold its extent. + // Once a call to Map succeeds, all subsequent calls to it will + // fail. + bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName); + + protected: + // If we are currently holding a mapped object, these record the + // mapped address range. + void* mImage; + size_t mSize; + + // A logging sink, for complaining about mapping failures. + void (*mLog)(const char*); + + private: + // Are we currently holding a mapped object? This is private to + // the base class. Derived classes need to have their own way to + // track whether they are holding a mapped object. + bool mIsMapped; + + // Disable copying and assignment. + AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&); + AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&); + // Disable heap allocation of this class. + void* operator new(size_t); + void* operator new[](size_t); + void operator delete(void*); + void operator delete[](void*); +}; + +#endif // AutoObjectMapper_h diff --git a/tools/profiler/lul/LulCommon.cpp b/tools/profiler/lul/LulCommon.cpp new file mode 100644 index 0000000000..428f102c42 --- /dev/null +++ b/tools/profiler/lul/LulCommon.cpp @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2011, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/module.cc +// src/common/unique_string.cc + +// There's no internal-only interface for LulCommon. Hence include +// the external interface directly. +#include "LulCommonExt.h" + +#include +#include + +#include +#include + +namespace lul { + +using std::string; + +//////////////////////////////////////////////////////////////// +// Module +// +Module::Module(const string& name, const string& os, const string& architecture, + const string& id) + : name_(name), os_(os), architecture_(architecture), id_(id) {} + +Module::~Module() {} + +//////////////////////////////////////////////////////////////// +// UniqueString +// +class UniqueString { + public: + explicit UniqueString(string str) { str_ = strdup(str.c_str()); } + ~UniqueString() { free(reinterpret_cast(const_cast(str_))); } + const char* str_; +}; + +const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; } + +bool IsEmptyUniqueString(const UniqueString* ustr) { + return (ustr->str_)[0] == '\0'; +} + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// +UniqueStringUniverse::~UniqueStringUniverse() { + for (std::map::iterator it = map_.begin(); + it != map_.end(); it++) { + delete it->second; + } +} + +const UniqueString* UniqueStringUniverse::ToUniqueString(string str) { + std::map::iterator it = map_.find(str); + if (it == map_.end()) { + UniqueString* ustr = new UniqueString(str); + map_[str] = ustr; + return ustr; + } else { + return it->second; + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulCommonExt.h b/tools/profiler/lul/LulCommonExt.h new file mode 100644 index 0000000000..b20a7321ff --- /dev/null +++ b/tools/profiler/lul/LulCommonExt.h @@ -0,0 +1,509 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2010, 2012, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// module.h: Define google_breakpad::Module. A Module holds debugging +// information, and can write that information out as a Breakpad +// symbol file. + +// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999. +// Copyright (c) 2001, 2002 Peter Dimov +// +// Permission to copy, use, modify, sell and distribute this software +// is granted provided this copyright notice appears in all copies. +// This software is provided "as is" without express or implied +// warranty, and with no claim as to its suitability for any purpose. +// +// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation. +// + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/unique_string.h +// src/common/scoped_ptr.h +// src/common/module.h + +// External interface for the "Common" component of LUL. + +#ifndef LulCommonExt_h +#define LulCommonExt_h + +#include +#include +#include + +#include +#include +#include +#include // for std::ptrdiff_t + +#include "mozilla/Assertions.h" + +namespace lul { + +using std::map; +using std::string; + +//////////////////////////////////////////////////////////////// +// UniqueString +// + +// Abstract type +class UniqueString; + +// Get the contained C string (debugging only) +const char* FromUniqueString(const UniqueString*); + +// Is the given string empty (that is, "") ? +bool IsEmptyUniqueString(const UniqueString*); + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// + +// All UniqueStrings live in some specific UniqueStringUniverse. +class UniqueStringUniverse { + public: + UniqueStringUniverse() {} + ~UniqueStringUniverse(); + // Convert a |string| to a UniqueString, that lives in this universe. + const UniqueString* ToUniqueString(string str); + + private: + map map_; +}; + +//////////////////////////////////////////////////////////////// +// GUID +// + +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MDGUID; // GUID + +typedef MDGUID GUID; + +//////////////////////////////////////////////////////////////// +// scoped_ptr +// + +// scoped_ptr mimics a built-in pointer except that it guarantees deletion +// of the object pointed to, either on destruction of the scoped_ptr or via +// an explicit reset(). scoped_ptr is a simple solution for simple needs; +// use shared_ptr or std::auto_ptr if your needs are more complex. + +// *** NOTE *** +// If your scoped_ptr is a class member of class FOO pointing to a +// forward declared type BAR (as shown below), then you MUST use a non-inlined +// version of the destructor. The destructor of a scoped_ptr (called from +// FOO's destructor) must have a complete definition of BAR in order to +// destroy it. Example: +// +// -- foo.h -- +// class BAR; +// +// class FOO { +// public: +// FOO(); +// ~FOO(); // Required for sources that instantiate class FOO to compile! +// +// private: +// scoped_ptr bar_; +// }; +// +// -- foo.cc -- +// #include "foo.h" +// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition. + +// scoped_ptr_malloc added by Google +// When one of these goes out of scope, instead of doing a delete or +// delete[], it calls free(). scoped_ptr_malloc is likely to see +// much more use than any other specializations. + +// release() added by Google +// Use this to conditionally transfer ownership of a heap-allocated object +// to the caller, usually on method success. + +template +class scoped_ptr { + private: + T* ptr; + + scoped_ptr(scoped_ptr const&); + scoped_ptr& operator=(scoped_ptr const&); + + public: + typedef T element_type; + + explicit scoped_ptr(T* p = 0) : ptr(p) {} + + ~scoped_ptr() { delete ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete ptr; + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr should have its own object + template + bool operator==(scoped_ptr const& p) const; + template + bool operator!=(scoped_ptr const& p) const; +}; + +template +inline void swap(scoped_ptr& a, scoped_ptr& b) { + a.swap(b); +} + +template +inline bool operator==(T* p, const scoped_ptr& b) { + return p == b.get(); +} + +template +inline bool operator!=(T* p, const scoped_ptr& b) { + return p != b.get(); +} + +// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to +// is guaranteed, either on destruction of the scoped_array or via an explicit +// reset(). Use shared_array or std::vector if your needs are more complex. + +template +class scoped_array { + private: + T* ptr; + + scoped_array(scoped_array const&); + scoped_array& operator=(scoped_array const&); + + public: + typedef T element_type; + + explicit scoped_array(T* p = 0) : ptr(p) {} + + ~scoped_array() { delete[] ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete[] ptr; + ptr = p; + } + } + + T& operator[](std::ptrdiff_t i) const { + MOZ_ASSERT(ptr != 0); + MOZ_ASSERT(i >= 0); + return ptr[i]; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_array& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_array should have its own object + template + bool operator==(scoped_array const& p) const; + template + bool operator!=(scoped_array const& p) const; +}; + +template +inline void swap(scoped_array& a, scoped_array& b) { + a.swap(b); +} + +template +inline bool operator==(T* p, const scoped_array& b) { + return p == b.get(); +} + +template +inline bool operator!=(T* p, const scoped_array& b) { + return p != b.get(); +} + +// This class wraps the c library function free() in a class that can be +// passed as a template argument to scoped_ptr_malloc below. +class ScopedPtrMallocFree { + public: + inline void operator()(void* x) const { free(x); } +}; + +// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a +// second template argument, the functor used to free the object. + +template +class scoped_ptr_malloc { + private: + T* ptr; + + scoped_ptr_malloc(scoped_ptr_malloc const&); + scoped_ptr_malloc& operator=(scoped_ptr_malloc const&); + + public: + typedef T element_type; + + explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {} + + ~scoped_ptr_malloc() { free_((void*)ptr); } + + void reset(T* p = 0) { + if (ptr != p) { + free_((void*)ptr); + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr_malloc& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr_malloc should have its own object + template + bool operator==(scoped_ptr_malloc const& p) const; + template + bool operator!=(scoped_ptr_malloc const& p) const; + + static FreeProc const free_; +}; + +template +FP const scoped_ptr_malloc::free_ = FP(); + +template +inline void swap(scoped_ptr_malloc& a, scoped_ptr_malloc& b) { + a.swap(b); +} + +template +inline bool operator==(T* p, const scoped_ptr_malloc& b) { + return p == b.get(); +} + +template +inline bool operator!=(T* p, const scoped_ptr_malloc& b) { + return p != b.get(); +} + +//////////////////////////////////////////////////////////////// +// Module +// + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef uint64_t Address; + + // Representation of an expression. This can either be a postfix + // expression, in which case it is stored as a string, or a simple + // expression of the form (identifier + imm) or *(identifier + imm). + // It can also be invalid (denoting "no value"). + enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem }; + + struct Expr { + // Construct a simple-form expression + Expr(const UniqueString* ident, long offset, bool deref) { + if (IsEmptyUniqueString(ident)) { + Expr(); + } else { + postfix_ = ""; + ident_ = ident; + offset_ = offset; + how_ = deref ? kExprSimpleMem : kExprSimple; + } + } + + // Construct an invalid expression + Expr() { + postfix_ = ""; + ident_ = nullptr; + offset_ = 0; + how_ = kExprInvalid; + } + + // Return the postfix expression string, either directly, + // if this is a postfix expression, or by synthesising it + // for a simple expression. + std::string getExprPostfix() const { + switch (how_) { + case kExprPostfix: + return postfix_; + case kExprSimple: + case kExprSimpleMem: { + char buf[40]; + sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+', + how_ == kExprSimple ? "" : " ^"); + return std::string(FromUniqueString(ident_)) + std::string(buf); + } + case kExprInvalid: + default: + MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type"); + return "Expr::genExprPostfix: kExprInvalid"; + } + } + + // The identifier that gives the starting value for simple expressions. + const UniqueString* ident_; + // The offset to add for simple expressions. + long offset_; + // The Postfix expression string to evaluate for non-simple expressions. + std::string postfix_; + // The operation expressed by this expression. + ExprHow how_; + }; + + // A map from register names to expressions that recover + // their values. This can represent a complete set of rules to + // follow at some address, or a set of changes to be applied to an + // extant set of rules. + // NOTE! there are two completely different types called RuleMap. This + // is one of them. + typedef std::map RuleMap; + + // A map from addresses to RuleMaps, representing changes that take + // effect at given addresses. + typedef std::map RuleChangeMap; + + // A range of 'STACK CFI' stack walking information. An instance of + // this structure corresponds to a 'STACK CFI INIT' record and the + // subsequent 'STACK CFI' records that fall within its range. + struct StackFrameEntry { + // The starting address and number of bytes of machine code this + // entry covers. + Address address, size; + + // The initial register recovery rules, in force at the starting + // address. + RuleMap initial_rules; + + // A map from addresses to rule changes. To find the rules in + // force at a given address, start with initial_rules, and then + // apply the changes given in this map for all addresses up to and + // including the address you're interested in. + RuleChangeMap rule_changes; + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const std::string& name, const std::string& os, + const std::string& architecture, const std::string& id); + ~Module(); + + private: + // Module header entries. + std::string name_, os_, architecture_, id_; +}; + +} // namespace lul + +#endif // LulCommonExt_h diff --git a/tools/profiler/lul/LulDwarf.cpp b/tools/profiler/lul/LulDwarf.cpp new file mode 100644 index 0000000000..ea38ce50ea --- /dev/null +++ b/tools/profiler/lul/LulDwarf.cpp @@ -0,0 +1,2538 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy +// Original author: Jim Blandy + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/bytereader.cc +// src/common/dwarf/dwarf2reader.cc +// src/common/dwarf_cfi_to_module.cc + +#include +#include +#include +#include + +#include +#include + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Vector.h" + +#include "LulCommonExt.h" +#include "LulDwarfInt.h" + +// Set this to 1 for verbose logging +#define DEBUG_DWARF 0 + +namespace lul { + +using std::pair; +using std::string; + +ByteReader::ByteReader(enum Endianness endian) + : offset_reader_(NULL), + address_reader_(NULL), + endian_(endian), + address_size_(0), + offset_size_(0), + have_section_base_(), + have_text_base_(), + have_data_base_(), + have_function_base_() {} + +ByteReader::~ByteReader() {} + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { + const uint64 initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + return true; + case DW_EH_PE_pcrel: + return have_section_base_; + case DW_EH_PE_textrel: + return have_text_base_; + case DW_EH_PE_datarel: + return have_data_base_; + case DW_EH_PE_funcrel: + return have_function_base_; + default: + return false; + } +} + +uint64 ByteReader::ReadEncodedPointer(const char* buffer, + DwarfPointerEncoding encoding, + size_t* len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + MOZ_ASSERT(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + MOZ_ASSERT(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64 skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64 offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64 aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const char* aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64 offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64 base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + MOZ_ASSERT(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + MOZ_ASSERT(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + MOZ_ASSERT(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + MOZ_ASSERT(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64 pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + MOZ_ASSERT(AddressSize() == sizeof(uint64)); + + return pointer; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. This is an 8-way sum-of-products +// type. Excluding the INVALID variant, there is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. + +class CallFrameInfo::Rule final { + public: + enum Tag { + INVALID, + Undefined, + SameValue, + Offset, + ValOffset, + Register, + Expression, + ValExpression + }; + + private: + // tag_ (below) indicates the form of the expression. There are 7 forms + // plus INVALID. All non-INVALID expressions denote a machine-word-sized + // value at unwind time. The description below assumes the presence of, at + // unwind time: + // + // * a function R, which takes a Dwarf register number and returns its value + // in the callee frame (the one we are unwinding out of). + // + // * a function EvalDwarfExpr, which evaluates a Dwarf expression. + // + // Register numbers are encoded using the target ABI's Dwarf + // register-numbering conventions. Except where otherwise noted, a register + // value may also be the special value CallFrameInfo::Handler::kCFARegister + // ("the CFA"). + // + // The expression forms are represented using tag_, word1_ and word2_. The + // forms and denoted values are as follows: + // + // * INVALID: not a valid expression. + // valid fields: (none) + // denotes: no value + // + // * Undefined: denotes no value. This is used for a register whose value + // cannot be recovered. + // valid fields: (none) + // denotes: no value + // + // * SameValue: the register's value is the same as in the callee. + // valid fields: (none) + // denotes: R(the register that this Rule is associated with, + // not stored here) + // + // * Offset: the register's value is in memory at word2_ bytes away from + // Dwarf register number word1_. word2_ is interpreted as a *signed* + // offset. + // valid fields: word1_=DwarfReg, word2=Offset + // denotes: *(R(word1_) + word2_) + // + // * ValOffset: same as Offset, without the dereference. + // valid fields: word1_=DwarfReg, word2=Offset + // denotes: R(word1_) + word2_ + // + // * Register: the register's value is in some other register, + // which may not be the CFA. + // valid fields: word1_=DwarfReg + // denotes: R(word1_) + // + // * Expression: the register's value is in memory at a location that can be + // computed from the Dwarf expression contained in the word2_ bytes + // starting at word1_. Note these locations are into the area of the .so + // temporarily mmaped info for debuginfo reading and have no validity once + // debuginfo reading has finished. + // valid fields: ExprStart=word1_, ExprLen=word2_ + // denotes: *(EvalDwarfExpr(word1_, word2_)) + // + // * ValExpression: same as Expression, without the dereference. + // valid fields: ExprStart=word1_, ExprLen=word2_ + // denotes: EvalDwarfExpr(word1_, word2_) + // + + // 3 words (or less) for representation. Unused word1_/word2_ fields must + // be set to zero. + Tag tag_; + uintptr_t word1_; + uintptr_t word2_; + + // To ensure that word1_ can hold a pointer to an expression string. + static_assert(sizeof(const char*) <= sizeof(word1_)); + // To ensure that word2_ can hold any string length or memory offset. + static_assert(sizeof(size_t) <= sizeof(word2_)); + + // This class denotes an 8-way sum-of-product type, and accessing invalid + // fields is meaningless. The accessors and constructors below enforce + // that. + bool isCanonical() const { + switch (tag_) { + case Tag::INVALID: + case Tag::Undefined: + case Tag::SameValue: + return word1_ == 0 && word2_ == 0; + case Tag::Offset: + case Tag::ValOffset: + return true; + case Tag::Register: + return word2_ == 0; + case Tag::Expression: + case Tag::ValExpression: + return true; + default: + MOZ_CRASH(); + } + } + + public: + Tag tag() const { return tag_; } + int dwreg() const { + switch (tag_) { + case Tag::Offset: + case Tag::ValOffset: + case Tag::Register: + return (int)word1_; + default: + MOZ_CRASH(); + } + } + intptr_t offset() const { + switch (tag_) { + case Tag::Offset: + case Tag::ValOffset: + return (intptr_t)word2_; + default: + MOZ_CRASH(); + } + } + ImageSlice expr() const { + switch (tag_) { + case Tag::Expression: + case Tag::ValExpression: + return ImageSlice((const char*)word1_, (size_t)word2_); + default: + MOZ_CRASH(); + } + } + + // Constructor-y stuff + Rule() { + tag_ = Tag::INVALID; + word1_ = 0; + word2_ = 0; + } + + static Rule mkINVALID() { + Rule r; // is initialised by Rule() + return r; + } + static Rule mkUndefinedRule() { + Rule r; + r.tag_ = Tag::Undefined; + r.word1_ = 0; + r.word2_ = 0; + return r; + } + static Rule mkSameValueRule() { + Rule r; + r.tag_ = Tag::SameValue; + r.word1_ = 0; + r.word2_ = 0; + return r; + } + static Rule mkOffsetRule(int dwreg, intptr_t offset) { + Rule r; + r.tag_ = Tag::Offset; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = (uintptr_t)offset; + return r; + } + static Rule mkValOffsetRule(int dwreg, intptr_t offset) { + Rule r; + r.tag_ = Tag::ValOffset; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = (uintptr_t)offset; + return r; + } + static Rule mkRegisterRule(int dwreg) { + Rule r; + r.tag_ = Tag::Register; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = 0; + return r; + } + static Rule mkExpressionRule(ImageSlice expr) { + Rule r; + r.tag_ = Tag::Expression; + r.word1_ = (uintptr_t)expr.start_; + r.word2_ = (uintptr_t)expr.length_; + return r; + } + static Rule mkValExpressionRule(ImageSlice expr) { + Rule r; + r.tag_ = Tag::ValExpression; + r.word1_ = (uintptr_t)expr.start_; + r.word2_ = (uintptr_t)expr.length_; + return r; + } + + // Misc + inline bool isVALID() const { return tag_ != Tag::INVALID; } + + bool operator==(const Rule& rhs) const { + MOZ_ASSERT(isVALID() && rhs.isVALID()); + MOZ_ASSERT(isCanonical()); + MOZ_ASSERT(rhs.isCanonical()); + if (tag_ != rhs.tag_) { + return false; + } + switch (tag_) { + case Tag::INVALID: + MOZ_CRASH(); + case Tag::Undefined: + case Tag::SameValue: + return true; + case Tag::Offset: + case Tag::ValOffset: + return word1_ == rhs.word1_ && word2_ == rhs.word2_; + case Tag::Register: + return word1_ == rhs.word1_; + case Tag::Expression: + case Tag::ValExpression: + return expr() == rhs.expr(); + default: + MOZ_CRASH(); + } + } + + bool operator!=(const Rule& rhs) const { return !(*this == rhs); } + + // Tell HANDLER that, at ADDRESS in the program, REG can be + // recovered using this rule. If REG is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + bool Handle(Handler* handler, uint64 address, int reg) const { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::Undefined: + return handler->UndefinedRule(address, reg); + case Tag::SameValue: + return handler->SameValueRule(address, reg); + case Tag::Offset: + return handler->OffsetRule(address, reg, word1_, word2_); + case Tag::ValOffset: + return handler->ValOffsetRule(address, reg, word1_, word2_); + case Tag::Register: + return handler->RegisterRule(address, reg, word1_); + case Tag::Expression: + return handler->ExpressionRule( + address, reg, ImageSlice((const char*)word1_, (size_t)word2_)); + case Tag::ValExpression: + return handler->ValExpressionRule( + address, reg, ImageSlice((const char*)word1_, (size_t)word2_)); + default: + MOZ_CRASH(); + } + } + + void SetBaseRegister(unsigned reg) { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::ValOffset: + word1_ = reg; + break; + case Tag::Offset: + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + // (fallthrough) + case Tag::Undefined: + case Tag::SameValue: + case Tag::Register: + case Tag::Expression: + case Tag::ValExpression: + // Do nothing + break; + default: + MOZ_CRASH(); + } + } + + void SetOffset(long long offset) { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::ValOffset: + word2_ = offset; + break; + case Tag::Offset: + // Same comment as in SetBaseRegister applies + // (fallthrough) + case Tag::Undefined: + case Tag::SameValue: + case Tag::Register: + case Tag::Expression: + case Tag::ValExpression: + // Do nothing + break; + default: + MOZ_CRASH(); + } + } + + // For debugging only + string show() const { + char buf[100]; + string s = ""; + switch (tag_) { + case Tag::INVALID: + s = "INVALID"; + break; + case Tag::Undefined: + s = "Undefined"; + break; + case Tag::SameValue: + s = "SameValue"; + break; + case Tag::Offset: + s = "Offset{..}"; + break; + case Tag::ValOffset: + sprintf(buf, "ValOffset{reg=%d offs=%lld}", (int)word1_, + (long long int)word2_); + s = string(buf); + break; + case Tag::Register: + s = "Register{..}"; + break; + case Tag::Expression: + s = "Expression{..}"; + break; + case Tag::ValExpression: + s = "ValExpression{..}"; + break; + default: + MOZ_CRASH(); + } + return s; + } +}; + +// `RuleMapLowLevel` is a simple class that maps from `int` (register numbers) +// to `Rule`. This is implemented as a vector of `` pairs, with a +// 12-element inline capacity. From a big-O perspective this is obviously a +// terrible way to implement an associative map. This workload is however +// quite special in that the maximum number of elements is normally 7 (on +// x86_64-linux), and so this implementation is much faster than one based on +// std::map with its attendant R-B-tree node allocation and balancing +// overheads. +// +// An iterator that enumerates the mapping in increasing order of the `int` +// keys is provided. This ordered iteration facility is required by +// CallFrameInfo::RuleMap::HandleTransitionTo, which needs to iterate through +// two such maps simultaneously and in-order so as to compare them. + +// All `Rule`s in the map must satisfy `isVALID()`. That conveniently means +// that `Rule::mkINVALID()` can be used to indicate "not found` in `get()`. + +class CallFrameInfo::RuleMapLowLevel { + using Entry = pair; + + // The inline capacity of 12 is carefully chosen. It would be wise to make + // careful measurements of time, instruction count, allocation count and + // allocated bytes before changing it. For x86_64-linux, a value of 8 is + // marginally better; using 12 increases the total heap bytes allocated by + // around 20%. For arm64-linux, a value of 24 is better; using 12 increases + // the total blocks allocated by around 20%. But it's a not bad tradeoff + // for both targets, and in any case is vastly superior to the previous + // scheme of using `std::map`. + mozilla::Vector entries_; + + public: + void clear() { entries_.clear(); } + + RuleMapLowLevel() { clear(); } + + RuleMapLowLevel& operator=(const RuleMapLowLevel& rhs) { + entries_.clear(); + for (size_t i = 0; i < rhs.entries_.length(); i++) { + bool ok = entries_.append(rhs.entries_[i]); + MOZ_RELEASE_ASSERT(ok); + } + return *this; + } + + void set(int reg, Rule rule) { + MOZ_ASSERT(rule.isVALID()); + // Find the place where it should go, if any + size_t i = 0; + size_t nEnt = entries_.length(); + while (i < nEnt && entries_[i].first < reg) { + i++; + } + if (i == nEnt) { + // No entry exists, and all the existing ones are for lower register + // numbers. So just add it at the end. + bool ok = entries_.append(Entry(reg, rule)); + MOZ_RELEASE_ASSERT(ok); + } else { + // It needs to live at location `i`, and .. + MOZ_ASSERT(i < nEnt); + if (entries_[i].first == reg) { + // .. there's already an old entry, so just update it. + entries_[i].second = rule; + } else { + // .. there's no previous entry, so shift `i` and all those following + // it one place to the right, and put the new entry at `i`. Doing it + // manually is measurably cheaper than using `Vector::insert`. + MOZ_ASSERT(entries_[i].first > reg); + bool ok = entries_.append(Entry(999999, Rule::mkINVALID())); + MOZ_RELEASE_ASSERT(ok); + for (size_t j = nEnt; j >= i + 1; j--) { + entries_[j] = entries_[j - 1]; + } + entries_[i] = Entry(reg, rule); + } + } + // Check in-order-ness and validity. + for (size_t i = 0; i < entries_.length(); i++) { + MOZ_ASSERT(entries_[i].second.isVALID()); + MOZ_ASSERT_IF(i > 0, entries_[i - 1].first < entries_[i].first); + } + MOZ_ASSERT(get(reg).isVALID()); + } + + // Find the entry for `reg`, or return `Rule::mkINVALID()` if not found. + Rule get(int reg) const { + size_t nEnt = entries_.length(); + // "early exit" in the case where `entries_[i].first > reg` was tested on + // x86_64 and found to be slightly slower than just testing all entries, + // presumably because the reduced amount of searching was not offset by + // the cost of an extra test per iteration. + for (size_t i = 0; i < nEnt; i++) { + if (entries_[i].first == reg) { + CallFrameInfo::Rule ret = entries_[i].second; + MOZ_ASSERT(ret.isVALID()); + return ret; + } + } + return CallFrameInfo::Rule::mkINVALID(); + } + + // A very simple in-order iteration facility. + class Iter { + const RuleMapLowLevel* rmll_; + size_t nextIx_; + + public: + explicit Iter(const RuleMapLowLevel* rmll) : rmll_(rmll), nextIx_(0) {} + bool avail() const { return nextIx_ < rmll_->entries_.length(); } + bool finished() const { return !avail(); } + // Move the iterator to the next entry. + void step() { + MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length()); + nextIx_++; + } + // Get the value at the current iteration point, but don't advance to the + // next entry. + pair peek() { + MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length()); + return rmll_->entries_[nextIx_]; + } + }; +}; + +// A map from register numbers to rules. This is a wrapper around +// `RuleMapLowLevel`, with added logic for dealing with the "special" CFA +// rule, and with `HandleTransitionTo`, which effectively computes the +// difference between two `RuleMaps`. + +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(Rule::mkINVALID()) {} + RuleMap(const RuleMap& rhs) : cfa_rule_(Rule::mkINVALID()) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap& operator=(const RuleMap& rhs); + + // Set the rule for computing the CFA to RULE. + void SetCFARule(Rule rule) { cfa_rule_ = rule; } + + // Return the current CFA rule. Be careful not to modify it -- it's returned + // by value. If you want to modify the CFA rule, use CFARuleRef() instead. + // We use these two for DW_CFA_def_cfa_offset and DW_CFA_def_cfa_register, + // and for detecting references to the CFA before a rule for it has been + // established. + Rule CFARule() const { return cfa_rule_; } + Rule* CFARuleRef() { return &cfa_rule_; } + + // Return the rule for REG, or the INVALID rule if there is none. + Rule RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. + void SetRegisterRule(int reg, Rule rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler* handler, uint64 address, + const RuleMap& new_rules) const; + + private: + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. + Rule cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. + RuleMapLowLevel registers_; +}; + +CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) { + Clear(); + if (rhs.cfa_rule_.isVALID()) cfa_rule_ = rhs.cfa_rule_; + registers_ = rhs.registers_; + return *this; +} + +CallFrameInfo::Rule CallFrameInfo::RuleMap::RegisterRule(int reg) const { + MOZ_ASSERT(reg != Handler::kCFARegister); + return registers_.get(reg); +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule rule) { + MOZ_ASSERT(reg != Handler::kCFARegister); + MOZ_ASSERT(rule.isVALID()); + registers_.set(reg, rule); +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler* handler, uint64 address, const RuleMap& new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_.isVALID() && new_rules.cfa_rule_.isVALID()) { + if (cfa_rule_ != new_rules.cfa_rule_ && + !new_rules.cfa_rule_.Handle(handler, address, Handler::kCFARegister)) { + return false; + } + } else if (cfa_rule_.isVALID()) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_.isVALID()) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleMapLowLevel::Iter old_it(®isters_); + RuleMapLowLevel::Iter new_it(&new_rules.registers_); + while (!old_it.finished() && !new_it.finished()) { + pair old_pair = old_it.peek(); + pair new_pair = new_it.peek(); + if (old_pair.first < new_pair.first) { + // This RuleMap has an entry for old.first, but NEW_RULES doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_pair.first)) { + return false; + } + old_it.step(); + } else if (old_pair.first > new_pair.first) { + // NEW_RULES has an entry for new_pair.first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (old_pair.second != new_pair.second && + !new_pair.second.Handle(handler, address, new_pair.first)) { + return false; + } + new_it.step(); + old_it.step(); + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (!old_it.finished()) { + pair old_pair = old_it.peek(); + if (!handler->SameValueRule(address, old_pair.first)) return false; + old_it.step(); + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + MOZ_ASSERT(new_it.finished()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + cfa_rule_ = Rule::mkINVALID(); + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader* reader, Handler* handler, Reporter* reporter, + uint64 address) + : reader_(reader), + handler_(handler), + reporter_(reporter), + address_(address), + entry_(NULL), + cursor_(NULL), + saved_rules_(NULL) {} + + ~State() { + if (saved_rules_) delete saved_rules_; + } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE& cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE& fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + ImageSlice expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char* format, Operands* operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + MOZ_ALWAYS_INLINE bool DoInstruction(); + + // Repeatedly call `DoInstruction`, until either: + // * it returns `false`, which indicates some kind of failure, + // in which case return `false` from here too, or + // * we've run out of instructions (that is, `cursor_ >= entry_->end`), + // in which case return `true`. + // This is marked as never-inline because it is the only place that + // `DoInstruction` is called from, and we want to maximise the chances that + // `DoInstruction` is inlined into this routine. + MOZ_NEVER_INLINE bool DoInstructions(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry* entry_; + + // The next instruction to process. + const char* cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack* saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE& cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + if (!DoInstructions()) { + return false; + } + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE& fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + return DoInstructions(); +} + +bool CallFrameInfo::State::ParseOperands(const char* format, + Operands* operands) { + size_t len; + const char* operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = reader_->ReadEncodedPointer( + cursor_, entry_->cie->pointer_encoding, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = ImageSlice(cursor_, expression_length); + cursor_ += expression_length; + break; + } + + default: + MOZ_ASSERT(0); + } + } + + return true; +} + +MOZ_ALWAYS_INLINE +bool CallFrameInfo::State::DoInstruction() { + CIE* cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + MOZ_ASSERT(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + MOZ_ASSERT(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + MOZ_ASSERT(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + Rule* cfa_rule = rules_.CFARuleRef(); + if (!cfa_rule->isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + if (!ParseOperands("r", &ops)) return false; + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) return false; + Rule rule = Rule::mkValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule.Handle(handler_, address_, Handler::kCFARegister)) return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, Rule::mkUndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, Rule::mkSameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, Rule::mkRegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, Rule::mkExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, + Rule::mkValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + if (!saved_rules_) { + saved_rules_ = new std::stack(); + } + saved_rules_->push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (!saved_rules_ || saved_rules_->empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap& new_rules = saved_rules_->top(); + if (rules_.CFARule().isVALID() && !new_rules.CFARule().isVALID()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_->pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, Rule::mkRegisterRule(i + 16))) return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, Rule::mkOffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +// See declaration above for rationale re the no-inline directive. +MOZ_NEVER_INLINE +bool CallFrameInfo::State::DoInstructions() { + while (cursor_ < entry_->end) { + if (!DoInstruction()) { + return false; + } + } + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule rule = Rule::mkValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule.Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule* cfa_rule = rules_.CFARuleRef(); + if (!cfa_rule->isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule rule) { + rules_.SetRegisterRule(reg, rule); + return rule.Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule().isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + Rule rule = Rule::mkOffsetRule(Handler::kCFARegister, offset); + return DoRule(reg, rule); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule().isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, Rule::mkValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule rule = cie_rules_.RegisterRule(reg); + if (!rule.isVALID()) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = Rule::mkSameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) { + const char* buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + MOZ_ASSERT(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE* cie) { + const char* cursor = cie->fields; + size_t len; + + MOZ_ASSERT(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 4) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const char* augmentation_start = cursor; + const void* augmentation_end = + memchr(augmentation_start, '\0', cie->end - augmentation_start); + if (!augmentation_end) return ReportIncomplete(cie); + cursor = static_cast(augmentation_end); + cie->augmentation = string(augmentation_start, cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + if (cie->version >= 4) { + // Check that the address_size and segment_size fields are plausible. + if (cie->end - cursor < 2) { + return ReportIncomplete(cie); + } + uint8_t address_size = reader_->ReadOneByte(cursor); + cursor++; + if (address_size != sizeof(void*)) { + // This is not per-se invalid CFI. But we can reasonably expect to + // be running on a target of the same word size as the CFI is for, + // so we reject this case. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size"); + return false; + } + uint8_t segment_size = reader_->ReadOneByte(cursor); + cursor++; + if (segment_size != 0) { + // This is also not per-se invalid CFI, but we don't currently handle + // the case of non-zero |segment_size|. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size"); + return false; + } + // We only continue parsing if |segment_size| is zero. If this routine + // is ever changed to allow non-zero |segment_size|, then + // ReadFDEFields() below will have to be changed to match, per comments + // there. + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char* data = cursor; + cursor += data_size; + const char* data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = reader_->ReadEncodedPointer( + data, cie->personality_encoding, &len); + if (len > size_t(data_end - data)) return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE* fde) { + const char* cursor = fde->fields; + size_t size; + + // At this point, for Dwarf 4 and above, we are assuming that the + // associated CIE has its |segment_size| field equal to zero. This is + // checked for in ReadCIEFields() above. If ReadCIEFields() is ever + // changed to allow non-zero |segment_size| CIEs then we will have to read + // the segment_selector value at this point. + + fde->address = + reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const char* buffer_end = buffer_ + buffer_length_; + const char* cursor; + bool all_ok = true; + const char* entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version, + cie.augmentation, cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_->PersonalityRoutine( + cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_->LanguageSpecificDataArea( + fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char* CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + MOZ_ASSERT(kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry* entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + char buf[300]; + SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string& aug) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset, + const char* what) { + char* what_safe = strndup(what, 100); + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies invalid Dwarf4 artefact: %s\n", + filename_.c_str(), offset, section_.c_str(), what_safe); + log_(buf); + free(what_safe); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: " + "0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule " + "has been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack " + "is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +unsigned int DwarfCFIToModule::RegisterNames::I386() { + /* + 8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi", + 3 "$eip", "$eflags", "$unused1", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 2 "$unused2", "$unused3", + 8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 3 "$fcw", "$fsw", "$mxcsr", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5", + 2 "$tr", "$ldtr" + */ + return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2; +} + +unsigned int DwarfCFIToModule::RegisterNames::X86_64() { + /* + 8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp", + 8 "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", + 1 "$rip", + 8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 1 "$rflags", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2", + 4 "$fs.base", "$gs.base", "$unused3", "$unused4", + 2 "$tr", "$ldtr", + 3 "$mxcsr", "$fcw", "$fsw" + */ + return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3; +} + +// Per ARM IHI 0040A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM() { + /* + 8 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + 8 "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + 8 "fps", "cpsr", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + 8 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + 8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + 8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + */ + return 13 * 8; +} + +// Per ARM IHI 0057A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM64() { + /* + 8 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + 8 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + 8 "x16" "x17", "x18", "x19", "x20", "x21", "x22", "x23", + 8 "x24", "x25", "x26", "x27", "x28", "x29", "x30","sp", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + 8 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + 8 "v16", "v17", "v18", "v19", "v20", "v21", "v22, "v23", + 8 "v24", "x25", "x26, "x27", "v28", "v29", "v30", "v31", + */ + return 12 * 8; +} + +unsigned int DwarfCFIToModule::RegisterNames::MIPS() { + /* + 8 "$zero", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3", + 8 "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + 8 "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", + 8 "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra", + 9 "$lo", "$hi", "$pc", "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", + 8 "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", + 7 "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", + 7 "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", + 6 "$f28", "$f29", "$f30", "$f31", "$fcsr", "$fir" + */ + return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6; +} + +// See prototype for comments. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + ImageSlice expr, bool debug, bool pushCfaAtStart, + bool derefAtEnd) { + const char* cursor = expr.start_; + const char* end1 = cursor + expr.length_; + + char buf[100]; + if (debug) { + SprintfLiteral(buf, "LUL.DW << DwarfExpr, len is %d\n", + (int)(end1 - cursor)); + summ->Log(buf); + } + + // Add a marker for the start of this expression. In it, indicate + // whether or not the CFA should be pushed onto the stack prior to + // evaluation. + int32_t start_ix = + summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0)); + MOZ_ASSERT(start_ix >= 0); + + while (cursor < end1) { + uint8 opc = reader->ReadOneByte(cursor); + cursor++; + + const char* nm = nullptr; + PfxExprOp pxop = PX_End; + + switch (opc) { + case DW_OP_lit0 ... DW_OP_lit31: { + int32_t simm32 = (int32_t)(opc - DW_OP_lit0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_lit%d\n", (int)simm32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32)); + break; + } + + case DW_OP_breg0 ... DW_OP_breg31: { + size_t len; + int64_t n = reader->ReadSignedLEB128(cursor, &len); + cursor += len; + DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_breg%d %lld\n", (int)reg, + (long long int)n); + summ->Log(buf); + } + // PfxInstr only allows a 32 bit signed offset. So we + // must fail if the immediate is out of range. + if (n < INT32_MIN || INT32_MAX < n) goto fail; + (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg)); + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n)); + (void)summ->AddPfxInstr(PfxInstr(PX_Add)); + break; + } + + case DW_OP_const4s: { + uint64_t u64 = reader->ReadFourBytes(cursor); + cursor += 4; + // u64 is guaranteed by |ReadFourBytes| to be in the + // range 0 .. FFFFFFFF inclusive. But to be safe: + uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF); + int32_t s32 = (int32_t)u32; + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_const4s %d\n", (int)s32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32)); + break; + } + + case DW_OP_deref: + nm = "deref"; + pxop = PX_Deref; + goto no_operands; + case DW_OP_and: + nm = "and"; + pxop = PX_And; + goto no_operands; + case DW_OP_plus: + nm = "plus"; + pxop = PX_Add; + goto no_operands; + case DW_OP_minus: + nm = "minus"; + pxop = PX_Sub; + goto no_operands; + case DW_OP_shl: + nm = "shl"; + pxop = PX_Shl; + goto no_operands; + case DW_OP_ge: + nm = "ge"; + pxop = PX_CmpGES; + goto no_operands; + no_operands: + MOZ_ASSERT(nm && pxop != PX_End); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_%s\n", nm); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(pxop)); + break; + + default: + if (debug) { + SprintfLiteral(buf, "LUL.DW unknown opc %d\n", (int)opc); + summ->Log(buf); + } + goto fail; + + } // switch (opc) + + } // while (cursor < end1) + + MOZ_ASSERT(cursor >= end1); + + if (cursor > end1) { + // We overran the Dwarf expression. Give up. + goto fail; + } + + // For DW_CFA_expression, what the expression denotes is the address + // of where the previous value is located. The caller of this routine + // may therefore request one last dereference before the end marker is + // inserted. + if (derefAtEnd) { + (void)summ->AddPfxInstr(PfxInstr(PX_Deref)); + } + + // Insert an end marker, and declare success. + (void)summ->AddPfxInstr(PfxInstr(PX_End)); + if (debug) { + SprintfLiteral(buf, + "LUL.DW conversion of dwarf expression succeeded, " + "ix = %d\n", + (int)start_ix); + summ->Log(buf); + summ->Log("LUL.DW >>\n"); + } + return start_ix; + +fail: + if (debug) { + summ->Log("LUL.DW conversion of dwarf expression failed\n"); + summ->Log("LUL.DW >>\n"); + } + return -1; +} + +bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string& augmentation, + unsigned return_address) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n", + address, length); + summ_->Log(buf); + } + + summ_->Entry(address, length); + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. + + // Get ready to collect entries. + return_address_ = return_address; + + // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI + // may not establish any rule for .ra if the return address column + // is an ordinary register, and that register holds the return + // address on entry to the function. So establish an initial .ra + // rule citing the return address register. + if (return_address_ < num_dw_regs_) { + summ_->Rule(address, return_address_, NODEREF, return_address, 0); + } + + return true; +} + +const UniqueString* DwarfCFIToModule::RegisterName(int i) { + if (i < 0) { + MOZ_ASSERT(i == kCFARegister); + return usu_->ToUniqueString(".cfa"); + } + unsigned reg = i; + if (reg == return_address_) return usu_->ToUniqueString(".ra"); + + char buf[30]; + SprintfLiteral(buf, "dwarf_reg_%u", reg); + return usu_->ToUniqueString(buf); +} + +bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { + reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = Same\n", address, reg); + summ_->Log(buf); + } + // reg + 0 + summ_->Rule(address, reg, NODEREF, reg, 0); + return true; +} + +bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = *(r%d + %ld)\n", address, + reg, base_register, offset); + summ_->Log(buf); + } + // *(base_register + offset) + summ_->Rule(address, reg, DEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d + %ld\n", address, reg, + base_register, offset); + summ_->Log(buf); + } + // base_register + offset + summ_->Rule(address, reg, NODEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::RegisterRule(uint64 address, int reg, + int base_register) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d\n", address, reg, + base_register); + summ_->Log(buf); + } + // base_register + 0 + summ_->Rule(address, reg, NODEREF, base_register, 0); + return true; +} + +bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + true /*derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + false /*!derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::End() { + // module_->AddStackFrameEntry(entry_); + if (DEBUG_DWARF) { + summ_->Log("LUL.DW DwarfCFIToModule::End()\n"); + } + summ_->End(); + return true; +} + +void DwarfCFIToModule::Reporter::UndefinedNotSupported( + size_t offset, const UniqueString* reg) { + char buf[300]; + SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n"); + log_(buf); + // BPLOG(INFO) << file_ << ", section '" << section_ + // << "': the call frame entry at offset 0x" + // << std::setbase(16) << offset << std::setbase(10) + // << " sets the rule for register '" << FromUniqueString(reg) + // << "' to 'undefined', but the Breakpad symbol file format cannot " + // << " express this"; +} + +// FIXME: move this somewhere sensible +static bool is_power_of_2(uint64_t n) { + int i, nSetBits = 0; + for (i = 0; i < 8 * (int)sizeof(n); i++) { + if ((n & ((uint64_t)1) << i) != 0) nSetBits++; + } + return nSetBits <= 1; +} + +void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised( + size_t offset, const UniqueString* reg) { + static uint64_t n_complaints = 0; // This isn't threadsafe + n_complaints++; + if (!is_power_of_2(n_complaints)) return; + char buf[300]; + SprintfLiteral(buf, + "DwarfCFIToModule::Reporter::" + "ExpressionCouldNotBeSummarised(shown %llu times)\n", + (unsigned long long int)n_complaints); + log_(buf); +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfExt.h b/tools/profiler/lul/LulDwarfExt.h new file mode 100644 index 0000000000..4ee6fe17a8 --- /dev/null +++ b/tools/profiler/lul/LulDwarfExt.h @@ -0,0 +1,1312 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright 2006, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/types.h +// src/common/dwarf/dwarf2enums.h +// src/common/dwarf/bytereader.h +// src/common/dwarf_cfi_to_module.h +// src/common/dwarf/dwarf2reader.h + +#ifndef LulDwarfExt_h +#define LulDwarfExt_h + +#include "LulDwarfSummariser.h" + +#include "mozilla/Assertions.h" + +#include +#include + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#ifdef __PTRDIFF_TYPE__ +typedef __PTRDIFF_TYPE__ intptr; +typedef unsigned __PTRDIFF_TYPE__ uintptr; +#else +# error "Can't find pointer-sized integral types." +#endif + +namespace lul { + +class UniqueString; + +// This represents a read-only slice of the "image" (the temporarily mmaped-in +// .so). It is used for representing byte ranges containing Dwarf expressions. +// Note that equality (operator==) is on slice contents, not slice locations. +struct ImageSlice { + const char* start_; + size_t length_; + ImageSlice() : start_(0), length_(0) {} + ImageSlice(const char* start, size_t length) + : start_(start), length_(length) {} + // Make one from a C string (for testing only). Note, the terminating zero + // is not included in the length. + explicit ImageSlice(const char* cstring) + : start_(cstring), length_(strlen(cstring)) {} + explicit ImageSlice(const std::string& str) + : start_(str.c_str()), length_(str.length()) {} + ImageSlice(const ImageSlice& other) + : start_(other.start_), length_(other.length_) {} + ImageSlice(ImageSlice& other) + : start_(other.start_), length_(other.length_) {} + bool operator==(const ImageSlice& other) const { + if (length_ != other.length_) { + return false; + } + // This relies on the fact that that memcmp returns zero whenever length_ + // is zero. + return memcmp(start_, other.start_, length_) == 0; + } +}; + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 +}; + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE }; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64 ReadAddress(const char* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64 ReadInitialLength(const char* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64 section_base, const char* buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64 text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64 data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64 function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding, + size_t* len) const; + + private: + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64 section_base_; + uint64 text_base_, data_base_, function_base_; + const char* buffer_base_; +}; + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast(signed_buffer); + const uint16 buffer0 = buffer[0]; + const uint16 buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast(signed_buffer); + const uint32 buffer0 = buffer[0]; + const uint32 buffer1 = buffer[1]; + const uint32 buffer2 = buffer[2]; + const uint32 buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast(signed_buffer); + const uint64 buffer0 = buffer[0]; + const uint64 buffer1 = buffer[1]; + const uint64 buffer2 = buffer[2]; + const uint64 buffer3 = buffer[3]; + const uint64 buffer4 = buffer[4]; + const uint64 buffer5 = buffer[5]; + const uint64 buffer6 = buffer[6]; + const uint64 buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof(result)) && (byte & 0x40)) + result |= -((static_cast(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + MOZ_ASSERT(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + MOZ_ASSERT(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64 section_base, + const char* buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64 text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64 data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64 function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; } + +// (derived from) +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which +// accepts parsed DWARF call frame info and adds it to a Summariser object. + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + + CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader, + Handler* handler, Reporter* reporter, bool eh_frame = false) + : buffer_(buffer), + buffer_length_(buffer_length), + reader_(reader), + handler_(handler), + reporter_(reporter), + eh_frame_(eh_frame) {} + + ~CallFrameInfo() {} + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char* KindName(EntryKind kind); + + private: + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const char* start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const char* fields; + + // The start of this entry's instructions. + const char* instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const char* end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64 id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE* cie; + }; + + // A common information entry (CIE). + struct CIE : public Entry { + uint8 version; // CFI data version number + std::string augmentation; // vendor format extension markers + uint64 code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + }; + + // A frame description entry (FDE). + struct FDE : public Entry { + uint64 address; // start address of described code + uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; + }; + + // Internal use. + class Rule; + class RuleMapLowLevel; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const char* cursor, Entry* entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE* cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE* fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry* entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const char* buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = DW_REG_CFA }; + + Handler() {} + virtual ~Handler() {} + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. +// These messages are sent to the message sink |aLog| provided to the +// constructor. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(void (*aLog)(const char*), const std::string& filename, + const std::string& section = ".debug_frame") + : log_(aLog), filename_(filename), section_(section) {} + virtual ~Reporter() {} + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64 offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64 offset, + const std::string& augmentation); + + // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4 + // specific field (currently, only "address_size" or "segment_size"). + // Parsing DWARF CFI with unexpected values here seems dubious at best, + // so we stop. WHAT gives a little more information about what is wrong. + virtual void InvalidDwarf4Artefact(uint64 offset, const char* what); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + // The name of the file whose CFI we're reading. + std::string filename_; + + // The name of the CFI section in that file. + std::string section_; +}; + +using lul::CallFrameInfo; +using lul::Summariser; + +// A class that accepts parsed call frame information from the DWARF +// CFI parser and populates a google_breakpad::Module object with the +// contents. +class DwarfCFIToModule : public CallFrameInfo::Handler { + public: + // DwarfCFIToModule uses an instance of this class to report errors + // detected while converting DWARF CFI to Breakpad STACK CFI records. + class Reporter { + public: + // Create a reporter that writes messages to the message sink + // |aLog|. FILE is the name of the file we're processing, and + // SECTION is the name of the section within that file that we're + // looking at (.debug_frame, .eh_frame, etc.). + Reporter(void (*aLog)(const char*), const std::string& file, + const std::string& section) + : log_(aLog), file_(file), section_(section) {} + virtual ~Reporter() {} + + // The DWARF CFI entry at OFFSET says that REG is undefined, but the + // Breakpad symbol file format cannot express this. + virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg); + + // The DWARF CFI entry at OFFSET says that REG uses a DWARF + // expression to find its value, but parseDwarfExpr could not + // convert it to a sequence of PfxInstrs. + virtual void ExpressionCouldNotBeSummarised(size_t offset, + const UniqueString* reg); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + std::string file_, section_; + }; + + // Register name tables. If TABLE is a vector returned by one of these + // functions, then TABLE[R] is the name of the register numbered R in + // DWARF call frame information. + class RegisterNames { + public: + // Intel's "x86" or IA-32. + static unsigned int I386(); + + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 + static unsigned int X86_64(); + + // ARM. + static unsigned int ARM(); + + // AARCH64. + static unsigned int ARM64(); + + // MIPS. + static unsigned int MIPS(); + }; + + // Create a handler for the dwarf2reader::CallFrameInfo parser that + // records the stack unwinding information it receives in SUMM. + // + // Use REGISTER_NAMES[I] as the name of register number I; *this + // keeps a reference to the vector, so the vector should remain + // alive for as long as the DwarfCFIToModule does. + // + // Use REPORTER for reporting problems encountered in the conversion + // process. + DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter, + ByteReader* reader, + /*MOD*/ UniqueStringUniverse* usu, + /*OUT*/ Summariser* summ) + : summ_(summ), + usu_(usu), + num_dw_regs_(num_dw_regs), + reporter_(reporter), + reader_(reader), + return_address_(-1) {} + virtual ~DwarfCFIToModule() {} + + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) override; + virtual bool UndefinedRule(uint64 address, int reg) override; + virtual bool SameValueRule(uint64 address, int reg) override; + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool RegisterRule(uint64 address, int reg, + int base_register) override; + virtual bool ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) override; + virtual bool ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) override; + virtual bool End() override; + + private: + // Return the name to use for register I. + const UniqueString* RegisterName(int i); + + // The Summariser to which we should give entries + Summariser* summ_; + + // Universe for creating UniqueStrings in, should that be necessary. + UniqueStringUniverse* usu_; + + // The number of Dwarf-defined register names for this architecture. + const unsigned int num_dw_regs_; + + // The reporter to use to report problems. + Reporter* reporter_; + + // The ByteReader to use for parsing Dwarf expressions. + ByteReader* reader_; + + // The section offset of the current frame description entry, for + // use in error messages. + size_t entry_offset_; + + // The return address column for that entry. + unsigned return_address_; +}; + +// Convert the Dwarf expression in |expr| into PfxInstrs stored in the +// SecMap referred to by |summ|, and return the index of the starting +// PfxInstr added, which must be >= 0. In case of failure return -1. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + ImageSlice expr, bool debug, bool pushCfaAtStart, + bool derefAtEnd); + +} // namespace lul + +#endif // LulDwarfExt_h diff --git a/tools/profiler/lul/LulDwarfInt.h b/tools/profiler/lul/LulDwarfInt.h new file mode 100644 index 0000000000..b72c6e08e3 --- /dev/null +++ b/tools/profiler/lul/LulDwarfInt.h @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy + +// This file is derived from the following file in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/dwarf2enums.h + +#ifndef LulDwarfInt_h +#define LulDwarfInt_h + +#include "LulCommonExt.h" +#include "LulDwarfExt.h" + +namespace lul { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. + +// Call Frame Info instructions. +enum DwarfCFI { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f +}; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Expression opcodes +enum DwarfExpressionOpcodes { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2f, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_lit0 = 0x30, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff +}; + +} // namespace lul + +#endif // LulDwarfInt_h diff --git a/tools/profiler/lul/LulDwarfSummariser.cpp b/tools/profiler/lul/LulDwarfSummariser.cpp new file mode 100644 index 0000000000..e9172c3e18 --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.cpp @@ -0,0 +1,549 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulDwarfSummariser.h" + +#include "LulDwarfExt.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +// Set this to 1 for verbose logging +#define DEBUG_SUMMARISER 0 + +namespace lul { + +// Do |s64|'s lowest 32 bits sign extend back to |s64| itself? +static inline bool fitsIn32Bits(int64 s64) { + return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000; +} + +// Check a LExpr prefix expression, starting at pfxInstrs[start] up to +// the next PX_End instruction, to ensure that: +// * It only mentions registers that are tracked on this target +// * The start point is sane +// If the expression is ok, return NULL. Else return a pointer +// a const char* holding a bit of text describing the problem. +static const char* checkPfxExpr(const vector* pfxInstrs, + int64_t start) { + size_t nInstrs = pfxInstrs->size(); + if (start < 0 || start >= (ssize_t)nInstrs) { + return "bogus start point"; + } + size_t i; + for (i = start; i < nInstrs; i++) { + PfxInstr pxi = (*pfxInstrs)[i]; + if (pxi.mOpcode == PX_End) break; + if (pxi.mOpcode == PX_DwReg && + !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) { + return "uses untracked reg"; + } + } + return nullptr; // success +} + +Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias, + void (*aLog)(const char*)) + : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) { + mCurrAddr = 0; + mMax1Addr = 0; // Gives an empty range. + + // Initialise the running RuleSet to "haven't got a clue" status. + new (&mCurrRules) RuleSet(); +} + +void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n", + (unsigned long long int)aAddress, + (unsigned long long int)aLength); + mLog(buf); + } + // This throws away any previous summary, that is, assumes + // that the previous summary, if any, has been properly finished + // by a call to End(). + mCurrAddr = aAddress; + mMax1Addr = aAddress + aLength; + new (&mCurrRules) RuleSet(); +} + +void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + if (how == NODEREF || how == DEREF) { + bool deref = how == DEREF; + SprintfLiteral(buf, "LUL 0x%llx old-r%d = %sr%d + %lld%s\n", + (unsigned long long int)aAddress, aNewReg, + deref ? "*(" : "", (int)oldReg, (long long int)offset, + deref ? ")" : ""); + } else if (how == PFXEXPR) { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = pfx-expr-at %lld\n", + (unsigned long long int)aAddress, aNewReg, + (long long int)offset); + } else { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = (invalid LExpr!)\n", + (unsigned long long int)aAddress, aNewReg); + } + mLog(buf); + } + + if (mCurrAddr < aAddress) { + // Flush the existing summary first. + mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, aAddress - mCurrAddr); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mCurrAddr, aAddress - mCurrAddr, mLog); + mLog("\n"); + } + mCurrAddr = aAddress; + } + + // If for some reason summarisation fails, either or both of these + // become non-null and point at constant text describing the + // problem. Using two rather than just one avoids complications of + // having to concatenate two strings to produce a complete error message. + const char* reason1 = nullptr; + const char* reason2 = nullptr; + + // |offset| needs to be a 32 bit value that sign extends to 64 bits + // on a 64 bit target. We will need to incorporate |offset| into + // any LExpr made here. So we may as well check it right now. + if (!fitsIn32Bits(offset)) { + reason1 = "offset not in signed 32-bit range"; + goto cant_summarise; + } + + // FIXME: factor out common parts of the arch-dependent summarisers. + +#if defined(GP_ARCH_arm) + + // ----------------- arm ----------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we + // choose to represent are: r7/11/12/13 + offset. The offset + // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM, + // hence there is no need to check it for overflow. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: { + // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or + // R15 (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for R7/11/12/13/14/15: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for R7/11/12/13/14/15: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_ARM_R7: + mCurrRules.mR7expr = expr; + break; + case DW_REG_ARM_R11: + mCurrRules.mR11expr = expr; + break; + case DW_REG_ARM_R12: + mCurrRules.mR12expr = expr; + break; + case DW_REG_ARM_R13: + mCurrRules.mR13expr = expr; + break; + case DW_REG_ARM_R14: + mCurrRules.mR14expr = expr; + break; + case DW_REG_ARM_R15: + mCurrRules.mR15expr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here. This program point + // is reached so often that it causes a flood of "Can't + // summarise" messages. In any case, we don't really care about + // the fact that this summary would produce a new value for a + // register that we're not tracking. We do on the other hand + // care if the summary's expression *uses* a register that we're + // not tracking. But in that case one of the above failures + // should tell us which. + goto cant_summarise; + } + + // Mark callee-saved registers (r4 .. r11) as unchanged, if there is + // no other information about them. FIXME: do this just once, at + // the point where the ruleset is committed. + if (mCurrRules.mR7expr.mHow == UNKNOWN) { + mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0); + } + if (mCurrRules.mR11expr.mHow == UNKNOWN) { + mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0); + } + if (mCurrRules.mR12expr.mHow == UNKNOWN) { + mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0); + } + + // The old r13 (SP) value before the call is always the same as the + // CFA. + mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0); + + // If there's no information about R15 (the return address), say + // it's a copy of R14 (the link register). + if (mCurrRules.mR15expr.mHow == UNKNOWN) { + mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0); + } + +#elif defined(GP_ARCH_arm64) + + // ----------------- arm64 ----------------- // + + switch (aNewReg) { + case DW_REG_CFA: + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_SP: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: { + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for X29/X30/SP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for X29/X30/SP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_AARCH64_X29: + mCurrRules.mX29expr = expr; + break; + case DW_REG_AARCH64_X30: + mCurrRules.mX30expr = expr; + break; + case DW_REG_AARCH64_SP: + mCurrRules.mSPexpr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons explained + // in the analogous point + goto cant_summarise; + } + + if (mCurrRules.mX29expr.mHow == UNKNOWN) { + mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0); + } + if (mCurrRules.mX30expr.mHow == UNKNOWN) { + mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0); + } + // On aarch64, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + + // ---------------- x64/x86 ---------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: { + // This is a rule that defines the CFA. The only forms we choose to + // represent are: = SP+offset, = FP+offset, or =prefix-expr. + switch (how) { + case NODEREF: + if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + break; + case DEREF: + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for CFA: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + } + + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XIP: { + // This is a new rule for XSP, XBP or XIP (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for XSP/XBP/XIP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for XSP/XBP/XIP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_INTEL_XBP: + mCurrRules.mXbpExpr = expr; + break; + case DW_REG_INTEL_XSP: + mCurrRules.mXspExpr = expr; + break; + case DW_REG_INTEL_XIP: + mCurrRules.mXipExpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On Intel, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mXspExpr.mHow == UNKNOWN) { + mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for BP when it is unchanged. + if (mCurrRules.mXbpExpr.mHow == UNKNOWN) { + mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0); + } + +#elif defined(GP_ARCH_mips64) + // ---------------- mips ---------------- // + // + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we can + // represent are: = SP+offset or = FP+offset. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_MIPS_SP: + case DW_REG_MIPS_FP: + case DW_REG_MIPS_PC: { + // This is a new rule for SP, FP or PC (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for SP/FP/PC: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for SP/FP/PC: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_MIPS_FP: + mCurrRules.mFPexpr = expr; + break; + case DW_REG_MIPS_SP: + mCurrRules.mSPexpr = expr; + break; + case DW_REG_MIPS_PC: + mCurrRules.mPCexpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On MIPS, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for FP when it is unchanged. + if (mCurrRules.mFPexpr.mHow == UNKNOWN) { + mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0); + } + +#else + +# error "Unsupported arch" +#endif + + return; + +cant_summarise: + if (reason1 || reason2) { + char buf[200]; + SprintfLiteral(buf, + "LUL can't summarise: " + "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n", + (unsigned long long int)(aAddress - mTextBias), + reason1 ? reason1 : "", reason2 ? reason2 : "", + NameOf_LExprHow(how), (unsigned int)oldReg, + (long long int)offset); + mLog(buf); + } +} + +uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) { + return mSecMap->AddPfxInstr(pfxi); +} + +void Summariser::End() { + if (DEBUG_SUMMARISER) { + mLog("LUL End\n"); + } + if (mCurrAddr < mMax1Addr) { + mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, mMax1Addr - mCurrAddr); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mCurrAddr, mMax1Addr - mCurrAddr, mLog); + mLog("\n"); + } + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfSummariser.h b/tools/profiler/lul/LulDwarfSummariser.h new file mode 100644 index 0000000000..30f1ba23c1 --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulDwarfSummariser_h +#define LulDwarfSummariser_h + +#include "LulMainInt.h" + +namespace lul { + +class Summariser { + public: + Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*)); + + virtual void Entry(uintptr_t aAddress, uintptr_t aLength); + virtual void End(); + + // Tell the summariser that the value for |aNewReg| at |aAddress| is + // recovered using the LExpr that can be constructed using the + // components |how|, |oldReg| and |offset|. The summariser will + // inspect the components and may reject them for various reasons, + // but the hope is that it will find them acceptable and record this + // rule permanently. + virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset); + + virtual uint32_t AddPfxInstr(PfxInstr pfxi); + + // Send output to the logging sink, for debugging. + virtual void Log(const char* str) { mLog(str); } + + private: + // The SecMap in which we park the finished summaries (RuleSets) and + // also any PfxInstrs derived from Dwarf expressions. + SecMap* mSecMap; + + // Running state for the current summary (RuleSet) under construction. + RuleSet mCurrRules; + + // The start of the address range to which the RuleSet under + // construction applies. + uintptr_t mCurrAddr; + + // The highest address, plus one, for which the RuleSet under + // construction could possibly apply. If there are no further + // incoming events then mCurrRules will eventually be emitted + // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is + // nonempty. + uintptr_t mMax1Addr; + + // The bias value (to add to the SVMAs, to get AVMAs) to be used + // when adding entries into mSecMap. + uintptr_t mTextBias; + + // A logging sink, for debugging. + void (*mLog)(const char* aFmt); +}; + +} // namespace lul + +#endif // LulDwarfSummariser_h diff --git a/tools/profiler/lul/LulElf.cpp b/tools/profiler/lul/LulElf.cpp new file mode 100644 index 0000000000..28980a1349 --- /dev/null +++ b/tools/profiler/lul/LulElf.cpp @@ -0,0 +1,887 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Restructured in 2009 by: Jim Blandy + +// (derived from) +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: +// Find all the debugging info in a file and dump it as a Breakpad symbol file. +// +// dump_symbols.h: Read debugging information from an ELF file, and write +// it out as a Breakpad symbol file. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.cc +// src/common/linux/elfutils.cc +// src/common/linux/file_id.cc + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulElfInt.h" +#include "LulMainInt.h" + +#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) +// bionic and older glibsc don't define it +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) +#endif + +#if (defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)) && \ + !defined(SHT_X86_64_UNWIND) +// This is sometimes necessary on x86_64-android and x86_64-linux. +# define SHT_X86_64_UNWIND 0x70000001 +#endif + +// Old Linux header doesn't define EM_AARCH64 +#ifndef EM_AARCH64 +# define EM_AARCH64 183 +#endif + +// This namespace contains helper functions. +namespace { + +using lul::DwarfCFIToModule; +using lul::FindElfSectionByName; +using lul::GetOffset; +using lul::IsValidElf; +using lul::Module; +using lul::scoped_ptr; +using lul::Summariser; +using lul::UniqueStringUniverse; +using std::set; +using std::string; +using std::vector; + +// +// FDWrapper +// +// Wrapper class to make sure opened file is closed. +// +class FDWrapper { + public: + explicit FDWrapper(int fd) : fd_(fd) {} + ~FDWrapper() { + if (fd_ != -1) close(fd_); + } + int get() { return fd_; } + int release() { + int fd = fd_; + fd_ = -1; + return fd; + } + + private: + int fd_; +}; + +// +// MmapWrapper +// +// Wrapper class to make sure mapped regions are unmapped. +// +class MmapWrapper { + public: + MmapWrapper() : is_set_(false), base_(NULL), size_(0) {} + ~MmapWrapper() { + if (is_set_ && base_ != NULL) { + MOZ_ASSERT(size_ > 0); + munmap(base_, size_); + } + } + void set(void* mapped_address, size_t mapped_size) { + is_set_ = true; + base_ = mapped_address; + size_ = mapped_size; + } + void release() { + MOZ_ASSERT(is_set_); + is_set_ = false; + base_ = NULL; + size_ = 0; + } + + private: + bool is_set_; + void* base_; + size_t size_; +}; + +// Set NUM_DW_REGNAMES to be the number of Dwarf register names +// appropriate to the machine architecture given in HEADER. Return +// true on success, or false if HEADER's machine architecture is not +// supported. +template +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, + unsigned int* num_dw_regnames) { + switch (elf_header->e_machine) { + case EM_386: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); + return true; + case EM_ARM: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); + return true; + case EM_X86_64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); + return true; + case EM_MIPS: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS(); + return true; + case EM_AARCH64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64(); + return true; + default: + MOZ_ASSERT(0); + return false; + } +} + +template +bool LoadDwarfCFI(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const char* section_name, + const typename ElfClass::Shdr* section, const bool eh_frame, + const typename ElfClass::Shdr* got_section, + const typename ElfClass::Shdr* text_section, + const bool big_endian, SecMap* smap, uintptr_t text_bias, + UniqueStringUniverse* usu, void (*log)(const char*)) { + // Find the appropriate set of register names for this file's + // architecture. + unsigned int num_dw_regs = 0; + if (!DwarfCFIRegisterNames(elf_header, &num_dw_regs)) { + fprintf(stderr, + "%s: unrecognized ELF machine architecture '%d';" + " cannot convert DWARF call frame information\n", + dwarf_filename.c_str(), elf_header->e_machine); + return false; + } + + const lul::Endianness endianness = + big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; + + // Find the call frame information and its size. + const char* cfi = GetOffset(elf_header, section->sh_offset); + size_t cfi_size = section->sh_size; + + // Plug together the parser, handler, and their entourages. + + // Here's a summariser, which will receive the output of the + // parser, create summaries, and add them to |smap|. + Summariser summ(smap, text_bias, log); + + lul::ByteReader reader(endianness); + reader.SetAddressSize(ElfClass::kAddrSize); + + DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); + DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ); + + // Provide the base addresses for .eh_frame encoded pointers, if + // possible. + reader.SetCFIDataBase(section->sh_addr, cfi); + if (got_section) reader.SetDataBase(got_section->sh_addr); + if (text_section) reader.SetTextBase(text_section->sh_addr); + + lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, + section_name); + lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + + return true; +} + +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, + void** elf_header) { + int obj_fd = open(obj_file.c_str(), O_RDONLY); + if (obj_fd < 0) { + fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + FDWrapper obj_fd_wrapper(obj_fd); + struct stat st; + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + // Mapping it read-only is good enough. In any case, mapping it + // read-write confuses Valgrind's debuginfo acquire/discard + // heuristics, making it hard to profile the profiler. + void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0); + if (obj_base == MAP_FAILED) { + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + map_wrapper->set(obj_base, st.st_size); + *elf_header = obj_base; + if (!IsValidElf(*elf_header)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); + return false; + } + return true; +} + +// Get the endianness of ELF_HEADER. If it's invalid, return false. +template +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, + bool* big_endian) { + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { + *big_endian = false; + return true; + } + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { + *big_endian = true; + return true; + } + + fprintf(stderr, "bad data encoding in ELF header: %d\n", + elf_header->e_ident[EI_DATA]); + return false; +} + +// +// LoadSymbolsInfo +// +// Holds the state between the two calls to LoadSymbols() in case it's necessary +// to follow the .gnu_debuglink section and load debug information from a +// different file. +// +template +class LoadSymbolsInfo { + public: + typedef typename ElfClass::Addr Addr; + + explicit LoadSymbolsInfo(const vector& dbg_dirs) + : debug_dirs_(dbg_dirs), has_loading_addr_(false) {} + + // Keeps track of which sections have been loaded so sections don't + // accidentally get loaded twice from two different files. + void LoadedSection(const string& section) { + if (loaded_sections_.count(section) == 0) { + loaded_sections_.insert(section); + } else { + fprintf(stderr, "Section %s has already been loaded.\n", section.c_str()); + } + } + + string debuglink_file() const { return debuglink_file_; } + + private: + const vector& debug_dirs_; // Directories in which to + // search for the debug ELF file. + + string debuglink_file_; // Full path to the debug ELF file. + + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. + + set loaded_sections_; // Tracks the Loaded ELF sections + // between calls to LoadSymbols(). +}; + +// Find the preferred loading address of the binary. +template +typename ElfClass::Addr GetLoadingAddress( + const typename ElfClass::Phdr* program_headers, int nheader) { + typedef typename ElfClass::Phdr Phdr; + + // For non-PIC executables (e_type == ET_EXEC), the load address is + // the start address of the first PT_LOAD segment. (ELF requires + // the segments to be sorted by load address.) For PIC executables + // and dynamic libraries (e_type == ET_DYN), this address will + // normally be zero. + for (int i = 0; i < nheader; ++i) { + const Phdr& header = program_headers[i]; + if (header.p_type == PT_LOAD) return header.p_vaddr; + } + return 0; +} + +template +bool LoadSymbols(const string& obj_file, const bool big_endian, + const typename ElfClass::Ehdr* elf_header, + const bool read_gnu_debug_link, + LoadSymbolsInfo* info, SecMap* smap, void* rx_avma, + size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Phdr Phdr; + typedef typename ElfClass::Shdr Shdr; + + char buf[500]; + SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + // This is how the text bias is calculated. + // BEGIN CALCULATE BIAS + uintptr_t loading_addr = GetLoadingAddress( + GetOffset(elf_header, elf_header->e_phoff), + elf_header->e_phnum); + uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; + SprintfLiteral(buf, "LoadSymbols: rx_avma=%llx, text_bias=%llx", + (unsigned long long int)(uintptr_t)rx_avma, + (unsigned long long int)text_bias); + buf[sizeof(buf) - 1] = 0; + log(buf); + // END CALCULATE BIAS + + const Shdr* sections = + GetOffset(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + bool found_usable_info = false; + + // Dwarf Call Frame Information (CFI) is actually independent from + // the other DWARF debugging information, and can be used alone. + const Shdr* dwarf_cfi_section = + FindElfSectionByName(".debug_frame", SHT_PROGBITS, sections, + names, names_end, elf_header->e_shnum); + if (dwarf_cfi_section) { + // Ignore the return value of this function; even without call frame + // information, the other debugging information could be perfectly + // useful. + info->LoadedSection(".debug_frame"); + bool result = LoadDwarfCFI(obj_file, elf_header, ".debug_frame", + dwarf_cfi_section, false, 0, 0, + big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .debug_frame"); + } + + // Linux C++ exception handling information can also provide + // unwinding data. + const Shdr* eh_frame_section = + FindElfSectionByName(".eh_frame", SHT_PROGBITS, sections, names, + names_end, elf_header->e_shnum); +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + if (!eh_frame_section) { + // Possibly depending on which linker created libxul.so, on x86_64-linux + // and -android, .eh_frame may instead have the SHT_X86_64_UNWIND type. + eh_frame_section = + FindElfSectionByName(".eh_frame", SHT_X86_64_UNWIND, sections, + names, names_end, elf_header->e_shnum); + } +#endif + if (eh_frame_section) { + // Pointers in .eh_frame data may be relative to the base addresses of + // certain sections. Provide those sections if present. + const Shdr* got_section = FindElfSectionByName( + ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + const Shdr* text_section = FindElfSectionByName( + ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + info->LoadedSection(".eh_frame"); + // As above, ignore the return value of this function. + bool result = LoadDwarfCFI( + obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section, + text_section, big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .eh_frame"); + } + + SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + return found_usable_info; +} + +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +template +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { + typedef typename ElfClass::Half Half; + Half arch = elf_header->e_machine; + switch (arch) { + case EM_386: + return "x86"; + case EM_ARM: + return "arm"; + case EM_AARCH64: + return "arm64"; + case EM_MIPS: + return "mips"; + case EM_PPC64: + return "ppc64"; + case EM_PPC: + return "ppc"; + case EM_S390: + return "s390"; + case EM_SPARC: + return "sparc"; + case EM_SPARCV9: + return "sparcv9"; + case EM_X86_64: + return "x86_64"; + default: + return NULL; + } +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + lul::FileID::ConvertIdentifierToString(identifier, identifier_str, + sizeof(identifier_str)); + string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +string BaseFileName(const string& filename) { + // Lots of copies! basename's behavior is less than ideal. + char* c_filename = strdup(filename.c_str()); + string base = basename(c_filename); + free(c_filename); + return base; +} + +template +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, + const string& obj_filename, + const vector& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Ehdr Ehdr; + + unsigned char identifier[16]; + if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { + fprintf(stderr, "%s: unable to generate file identifier\n", + obj_filename.c_str()); + return false; + } + + const char* architecture = ElfArchitecture(elf_header); + if (!architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + obj_filename.c_str(), elf_header->e_machine); + return false; + } + + // Figure out what endianness this file is. + bool big_endian; + if (!ElfEndianness(elf_header, &big_endian)) return false; + + string name = BaseFileName(obj_filename); + string os = "Linux"; + string id = FormatIdentifier(identifier); + + LoadSymbolsInfo info(debug_dirs); + if (!LoadSymbols(obj_filename, big_endian, elf_header, + !debug_dirs.empty(), &info, smap, rx_avma, rx_size, + usu, log)) { + const string debuglink_file = info.debuglink_file(); + if (debuglink_file.empty()) return false; + + // Load debuglink ELF file. + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); + MmapWrapper debug_map_wrapper; + Ehdr* debug_elf_header = NULL; + if (!LoadELF(debuglink_file, &debug_map_wrapper, + reinterpret_cast(&debug_elf_header))) + return false; + // Sanity checks to make sure everything matches up. + const char* debug_architecture = + ElfArchitecture(debug_elf_header); + if (!debug_architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + debuglink_file.c_str(), debug_elf_header->e_machine); + return false; + } + if (strcmp(architecture, debug_architecture)) { + fprintf(stderr, + "%s with ELF machine architecture %s does not match " + "%s with ELF architecture %s\n", + debuglink_file.c_str(), debug_architecture, obj_filename.c_str(), + architecture); + return false; + } + + bool debug_big_endian; + if (!ElfEndianness(debug_elf_header, &debug_big_endian)) + return false; + if (debug_big_endian != big_endian) { + fprintf(stderr, "%s and %s does not match in endianness\n", + obj_filename.c_str(), debuglink_file.c_str()); + return false; + } + + if (!LoadSymbols(debuglink_file, debug_big_endian, + debug_elf_header, false, &info, smap, rx_avma, + rx_size, usu, log)) { + return false; + } + } + + return true; +} + +} // namespace + +namespace lul { + +bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, + const vector& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + if (!IsValidElf(obj_file)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); + return false; + } + + int elfclass = ElfClass(obj_file); + if (elfclass == ELFCLASS32) { + return ReadSymbolDataElfClass( + reinterpret_cast(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + if (elfclass == ELFCLASS64) { + return ReadSymbolDataElfClass( + reinterpret_cast(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + + return false; +} + +bool ReadSymbolData(const string& obj_file, const vector& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, void (*log)(const char*)) { + MmapWrapper map_wrapper; + void* elf_header = NULL; + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false; + + return ReadSymbolDataInternal(reinterpret_cast(elf_header), + obj_file, debug_dirs, smap, rx_avma, rx_size, + usu, log); +} + +namespace { + +template +void FindElfClassSection(const char* elf_base, const char* section_name, + typename ElfClass::Word section_type, + const void** section_start, int* section_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Shdr* sections = + GetOffset(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName(section_name, section_type, sections, + names, names_end, elf_header->e_shnum); + + if (section != NULL && section->sh_size > 0) { + *section_start = elf_base + section->sh_offset; + *section_size = section->sh_size; + } +} + +template +void FindElfClassSegment(const char* elf_base, + typename ElfClass::Word segment_type, + const void** segment_start, int* segment_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Phdr Phdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Phdr* phdrs = + GetOffset(elf_header, elf_header->e_phoff); + + for (int i = 0; i < elf_header->e_phnum; ++i) { + if (phdrs[i].p_type == segment_type) { + *segment_start = elf_base + phdrs[i].p_offset; + *segment_size = phdrs[i].p_filesz; + return; + } + } +} + +} // namespace + +bool IsValidElf(const void* elf_base) { + return strncmp(reinterpret_cast(elf_base), ELFMAG, SELFMAG) == 0; +} + +int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = reinterpret_cast(elf_base); + + return elf_header->e_ident[EI_CLASS]; +} + +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + *section_start = NULL; + *section_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSection(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSection(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } + + return false; +} + +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + *segment_start = NULL; + *segment_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSegment(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSegment(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } + + return false; +} + +// (derived from) +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// + +// ELF note name and desc are 32-bits word padded. +#define NOTE_PADDING(a) ((a + 3) & ~3) + +// These functions are also used inside the crashed process, so be safe +// and use the syscall/libc wrappers instead of direct syscalls or libc. + +template +static bool ElfClassBuildIDNoteIdentifier(const void* section, int length, + uint8_t identifier[kMDGUIDSize]) { + typedef typename ElfClass::Nhdr Nhdr; + + const void* section_end = reinterpret_cast(section) + length; + const Nhdr* note_header = reinterpret_cast(section); + while (reinterpret_cast(note_header) < section_end) { + if (note_header->n_type == NT_GNU_BUILD_ID) break; + note_header = reinterpret_cast( + reinterpret_cast(note_header) + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz) + + NOTE_PADDING(note_header->n_descsz)); + } + if (reinterpret_cast(note_header) >= section_end || + note_header->n_descsz == 0) { + return false; + } + + const char* build_id = reinterpret_cast(note_header) + + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); + // Copy as many bits of the build ID as will fit + // into the GUID space. + memset(identifier, 0, kMDGUIDSize); + memcpy(identifier, build_id, + std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); + + return true; +} + +// Attempt to locate a .note.gnu.build-id section in an ELF binary +// and copy as many bytes of it as will fit into |identifier|. +static bool FindElfBuildIDNote(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* note_section; + int note_size, elfclass; + if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)¬e_section, + ¬e_size, &elfclass) || + note_size == 0) && + (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size, &elfclass) || + note_size == 0)) { + return false; + } + + if (elfclass == ELFCLASS32) { + return ElfClassBuildIDNoteIdentifier(note_section, note_size, + identifier); + } else if (elfclass == ELFCLASS64) { + return ElfClassBuildIDNoteIdentifier(note_section, note_size, + identifier); + } + + return false; +} + +// Attempt to locate the .text section of an ELF binary and generate +// a simple hash by XORing the first page worth of bytes into |identifier|. +static bool HashElfTextSection(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* text_section; + int text_size; + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, + (const void**)&text_section, &text_size, NULL) || + text_size == 0) { + return false; + } + + memset(identifier, 0, kMDGUIDSize); + const uint8_t* ptr = reinterpret_cast(text_section); + const uint8_t* ptr_end = ptr + std::min(text_size, 4096); + while (ptr < ptr_end) { + for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i]; + ptr += kMDGUIDSize; + } + return true; +} + +// static +bool FileID::ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]) { + // Look for a build id note first. + if (FindElfBuildIDNote(base, identifier)) return true; + + // Fall back on hashing the first page of the text section. + return HashElfTextSection(base, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length) { + uint8_t identifier_swapped[kMDGUIDSize]; + + // Endian-ness swap to match dump processor expectation. + memcpy(identifier_swapped, identifier, kMDGUIDSize); + uint32_t* data1 = reinterpret_cast(identifier_swapped); + *data1 = htonl(*data1); + uint16_t* data2 = reinterpret_cast(identifier_swapped + 4); + *data2 = htons(*data2); + uint16_t* data3 = reinterpret_cast(identifier_swapped + 6); + *data3 = htons(*data3); + + int buffer_idx = 0; + for (unsigned int idx = 0; + (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) { + int hi = (identifier_swapped[idx] >> 4) & 0x0F; + int lo = (identifier_swapped[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace lul diff --git a/tools/profiler/lul/LulElfExt.h b/tools/profiler/lul/LulElfExt.h new file mode 100644 index 0000000000..73d9ff7f15 --- /dev/null +++ b/tools/profiler/lul/LulElfExt.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.h + +#ifndef LulElfExt_h +#define LulElfExt_h + +// These two functions are the external interface to the +// ELF/Dwarf/EXIDX reader. + +#include "LulMainInt.h" + +using lul::SecMap; + +namespace lul { + +class UniqueStringUniverse; + +// Find all the unwind information in OBJ_FILE, an ELF executable +// or shared library, and add it to SMAP. +bool ReadSymbolData(const std::string& obj_file, + const std::vector& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)); + +// The same as ReadSymbolData, except that OBJ_FILE is assumed to +// point to a mapped-in image of OBJ_FILENAME. +bool ReadSymbolDataInternal(const uint8_t* obj_file, + const std::string& obj_filename, + const std::vector& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)); + +} // namespace lul + +#endif // LulElfExt_h diff --git a/tools/profiler/lul/LulElfInt.h b/tools/profiler/lul/LulElfInt.h new file mode 100644 index 0000000000..31ffba8ff0 --- /dev/null +++ b/tools/profiler/lul/LulElfInt.h @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/android/include/elf.h +// src/common/linux/elfutils.h +// src/common/linux/file_id.h +// src/common/linux/elfutils-inl.h + +#ifndef LulElfInt_h +#define LulElfInt_h + +// This header defines functions etc internal to the ELF reader. It +// should not be included outside of LulElf.cpp. + +#include +#include + +#include "mozilla/Assertions.h" + +#include "PlatformMacros.h" + +// (derived from) +// elfutils.h: Utilities for dealing with ELF files. +// +#include + +#if defined(GP_OS_android) + +// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h +// The Android headers don't always define this constant. +# ifndef EM_X86_64 +# define EM_X86_64 62 +# endif + +# ifndef EM_PPC64 +# define EM_PPC64 21 +# endif + +# ifndef EM_S390 +# define EM_S390 22 +# endif + +# ifndef NT_GNU_BUILD_ID +# define NT_GNU_BUILD_ID 3 +# endif + +# ifndef ElfW +# define ElfW(type) _ElfW(Elf, ELFSIZE, type) +# define _ElfW(e, w, t) _ElfW_1(e, w, _##t) +# define _ElfW_1(e, w, t) e##w##t +# endif + +#endif + +#if defined(GP_OS_freebsd) + +# ifndef ElfW +# define ElfW(type) Elf_##type +# endif + +#endif + +namespace lul { + +// Traits classes so consumers can write templatized code to deal +// with specific ELF bits. +struct ElfClass32 { + typedef Elf32_Addr Addr; + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Nhdr Nhdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Word Word; + static const int kClass = ELFCLASS32; + static const size_t kAddrSize = sizeof(Elf32_Addr); +}; + +struct ElfClass64 { + typedef Elf64_Addr Addr; + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Nhdr Nhdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Word Word; + static const int kClass = ELFCLASS64; + static const size_t kAddrSize = sizeof(Elf64_Addr); +}; + +bool IsValidElf(const void* elf_header); +int ElfClass(const void* elf_base); + +// Attempt to find a section named |section_name| of type |section_type| +// in the ELF binary data at |elf_mapped_base|. On success, returns true +// and sets |*section_start| to point to the start of the section data, +// and |*section_size| to the size of the section's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass); + +// Internal helper method, exposed for convenience for callers +// that already have more info. +template +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection); + +// Attempt to find the first segment of type |segment_type| in the ELF +// binary data at |elf_mapped_base|. On success, returns true and sets +// |*segment_start| to point to the start of the segment data, and +// and |*segment_size| to the size of the segment's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass); + +// Convert an offset from an Elf header into a pointer to the mapped +// address in the current process. Takes an extra template parameter +// to specify the return type to avoid having to dynamic_cast the +// result. +template +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset); + +// (derived from) +// file_id.h: Return a unique identifier for a file +// + +static const size_t kMDGUIDSize = sizeof(MDGUID); + +class FileID { + public: + // Load the identifier for the elf file mapped into memory at |base| into + // |identifier|. Return false if the identifier could not be created for the + // file. + static bool ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length); +}; + +template +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast(reinterpret_cast(elf_header) + + offset); +} + +template +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection) { + MOZ_ASSERT(name != NULL); + MOZ_ASSERT(sections != NULL); + MOZ_ASSERT(nsection > 0); + + int name_len = strlen(name); + if (name_len == 0) return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; +} + +} // namespace lul + +// And finally, the external interface, offered to LulMain.cpp +#include "LulElfExt.h" + +#endif // LulElfInt_h diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp new file mode 100644 index 0000000000..7cf5508234 --- /dev/null +++ b/tools/profiler/lul/LulMain.cpp @@ -0,0 +1,2079 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulMain.h" + +#include +#include +#include +#include // write(), only for testing LUL + +#include // std::sort +#include +#include + +#include "GeckoProfiler.h" // for profiler_current_thread_id() +#include "LulCommonExt.h" +#include "LulElfExt.h" +#include "LulMainInt.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" + +// Set this to 1 for verbose logging +#define DEBUG_MAIN 0 + +namespace lul { + +using mozilla::CheckedInt; +using mozilla::DebugOnly; +using mozilla::MallocSizeOf; +using mozilla::Unused; +using std::pair; +using std::string; +using std::vector; + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. +// Any such function -- and, hence, the transitive closure of those +// reachable from it -- must not do any dynamic memory allocation. +// Doing so risks deadlock. There is exactly one root function for +// the transitive closure: Lul::Unwind. +// +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +static const char* NameOf_DW_REG(int16_t aReg) { + switch (aReg) { + case DW_REG_CFA: + return "cfa"; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return "xbp"; + case DW_REG_INTEL_XSP: + return "xsp"; + case DW_REG_INTEL_XIP: + return "xip"; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return "r7"; + case DW_REG_ARM_R11: + return "r11"; + case DW_REG_ARM_R12: + return "r12"; + case DW_REG_ARM_R13: + return "r13"; + case DW_REG_ARM_R14: + return "r14"; + case DW_REG_ARM_R15: + return "r15"; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return "x29"; + case DW_REG_AARCH64_X30: + return "x30"; + case DW_REG_AARCH64_SP: + return "sp"; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return "sp"; + case DW_REG_MIPS_FP: + return "fp"; + case DW_REG_MIPS_PC: + return "pc"; +#else +# error "Unsupported arch" +#endif + default: + return "???"; + } +} + +string LExpr::ShowRule(const char* aNewReg) const { + char buf[64]; + string res = string(aNewReg) + "="; + switch (mHow) { + case UNKNOWN: + res += "Unknown"; + break; + case NODEREF: + SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case DEREF: + SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case PFXEXPR: + SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); + res += buf; + break; + default: + res += "???"; + break; + } + return res; +} + +void RuleSet::Print(uintptr_t avma, uintptr_t len, + void (*aLog)(const char*)) const { + char buf[96]; + SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma, + (unsigned long long int)(avma + len - 1)); + string res = string(buf); + res += mCfaExpr.ShowRule("cfa"); + res += " in"; + // For each reg we care about, print the recovery expression. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + res += mXipExpr.ShowRule(" RA"); + res += mXspExpr.ShowRule(" SP"); + res += mXbpExpr.ShowRule(" BP"); +#elif defined(GP_ARCH_arm) + res += mR15expr.ShowRule(" R15"); + res += mR7expr.ShowRule(" R7"); + res += mR11expr.ShowRule(" R11"); + res += mR12expr.ShowRule(" R12"); + res += mR13expr.ShowRule(" R13"); + res += mR14expr.ShowRule(" R14"); +#elif defined(GP_ARCH_arm64) + res += mX29expr.ShowRule(" X29"); + res += mX30expr.ShowRule(" X30"); + res += mSPexpr.ShowRule(" SP"); +#elif defined(GP_ARCH_mips64) + res += mPCexpr.ShowRule(" PC"); + res += mSPexpr.ShowRule(" SP"); + res += mFPexpr.ShowRule(" FP"); +#else +# error "Unsupported arch" +#endif + aLog(res.c_str()); +} + +LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { + switch (aRegno) { + case DW_REG_CFA: + return &mCfaExpr; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XIP: + return &mXipExpr; + case DW_REG_INTEL_XSP: + return &mXspExpr; + case DW_REG_INTEL_XBP: + return &mXbpExpr; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R15: + return &mR15expr; + case DW_REG_ARM_R14: + return &mR14expr; + case DW_REG_ARM_R13: + return &mR13expr; + case DW_REG_ARM_R12: + return &mR12expr; + case DW_REG_ARM_R11: + return &mR11expr; + case DW_REG_ARM_R7: + return &mR7expr; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return &mX29expr; + case DW_REG_AARCH64_X30: + return &mX30expr; + case DW_REG_AARCH64_SP: + return &mSPexpr; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return &mSPexpr; + case DW_REG_MIPS_FP: + return &mFPexpr; + case DW_REG_MIPS_PC: + return &mPCexpr; +#else +# error "Unknown arch" +#endif + default: + return nullptr; + } +} + +RuleSet::RuleSet() { + // All fields are of type LExpr and so are initialised by LExpr::LExpr(). +} + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// See header file LulMainInt.h for comments about invariants. + +SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen, + void (*aLog)(const char*)) + : mUsable(false), + mUniqifier(new mozilla::HashMap), + mLog(aLog) { + if (mapLen == 0) { + // Degenerate case. + mMapMinAVMA = 1; + mMapMaxAVMA = 0; + } else { + mMapMinAVMA = mapStartAVMA; + mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1; + } +} + +SecMap::~SecMap() { + mExtents.clear(); + mDictionary.clear(); + if (mUniqifier) { + mUniqifier->clear(); + mUniqifier = nullptr; + } +} + +// RUNS IN NO-MALLOC CONTEXT +RuleSet* SecMap::FindRuleSet(uintptr_t ia) { + // Binary search mExtents to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. Note that this works correctly even when + // mExtents.size() == 0. + + // Can't do this until the array has been sorted and preened. + MOZ_ASSERT(mUsable); + + long int lo = 0; + long int hi = (long int)mExtents.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + Extent* mid_extent = &mExtents[mid]; + uintptr_t mid_offset = mid_extent->offset(); + uintptr_t mid_len = mid_extent->len(); + uintptr_t mid_minAddr = mMapMinAVMA + mid_offset; + uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + uint32_t mid_extent_dictIx = mid_extent->dictIx(); + MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size()); + return &mDictionary[mid_extent_dictIx]; + } + // NOTREACHED +} + +// Add a RuleSet to the collection. The rule is copied in. Calling +// this makes the map non-searchable. +void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) { + mUsable = false; + + // Zero length RuleSet? Meaningless, but ignore it anyway. + if (len == 0) { + return; + } + + // Ignore attempts to add RuleSets whose address range doesn't fall within + // the declared address range for the SecMap. Maybe we should print some + // kind of error message rather than silently ignoring them. + if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) { + return; + } + + // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte + // chunk of address space, the following must now hold. + MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF); + + // See if `mUniqifier` already has `rs`. If so set `dictIx` to the assigned + // dictionary index; if not, add `rs` to `mUniqifier` and assign a new + // dictionary index. This is the core of the RuleSet-de-duplication process. + uint32_t dictIx = 0; + mozilla::HashMap::AddPtr + p = mUniqifier->lookupForAdd(*rs); + if (!p) { + dictIx = mUniqifier->count(); + // If this ever fails, Extents::dictIx will need to be changed to be a + // type wider than the current uint16_t. + MOZ_RELEASE_ASSERT(dictIx < (1 << 16)); + // This returns `false` on OOM. We ignore the return value since we asked + // for it to use the InfallibleAllocPolicy. + DebugOnly addedOK = mUniqifier->add(p, *rs, dictIx); + MOZ_ASSERT(addedOK); + } else { + dictIx = p->value(); + } + + uint32_t offset = (uint32_t)(avma - mMapMinAVMA); + while (len > 0) { + // Because Extents::len is a uint16_t, we have to add multiple `mExtents` + // entries to cover the case where `len` is equal to or greater than 2^16. + // This happens only exceedingly rarely. In order to get more test + // coverage on what would otherwise be a very low probability (less than + // 0.0002%) corner case, we do this in steps of 4095. On libxul.so as of + // Sept 2020, this increases the number of `mExtents` entries by about + // 0.05%, hence has no meaningful effect on space use, but increases the + // use of this corner case, and hence its test coverage, by a factor of 250. + uint32_t this_step_len = (len > 4095) ? 4095 : len; + mExtents.emplace_back(offset, this_step_len, dictIx); + offset += this_step_len; + len -= this_step_len; + } +} + +// Add a PfxInstr to the vector of such instrs, and return the index +// in the vector. Calling this makes the map non-searchable. +uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) { + mUsable = false; + mPfxInstrs.push_back(pfxi); + return mPfxInstrs.size() - 1; +} + +// Prepare the map for searching, by sorting it, de-overlapping entries and +// removing any resulting zero-length entries. At the start of this routine, +// all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero +// length, as a result of the checks in AddRuleSet(). +void SecMap::PrepareRuleSets() { + // At this point, the de-duped RuleSets are in `mUniqifier`, and + // `mDictionary` is empty. This method will, amongst other things, copy + // them into `mDictionary` in order of their assigned dictionary-index + // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`; + // after this method, it has no further use. + MOZ_RELEASE_ASSERT(mUniqifier); + MOZ_RELEASE_ASSERT(mDictionary.empty()); + + if (mExtents.empty()) { + mUniqifier->clear(); + mUniqifier = nullptr; + return; + } + + if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) { + // The map is empty. This should never happen. + mExtents.clear(); + mUniqifier->clear(); + mUniqifier = nullptr; + return; + } + MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA); + + // We must have at least one Extent, and as a consequence there must be at + // least one entry in the uniqifier. + MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty()); + +#ifdef DEBUG + // Check invariants on incoming Extents. + for (size_t i = 0; i < mExtents.size(); ++i) { + Extent* ext = &mExtents[i]; + uint32_t len = ext->len(); + MOZ_ASSERT(len > 0); + MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */); + uint32_t offset = ext->offset(); + uintptr_t avma = mMapMinAVMA + (uintptr_t)offset; + // Upper bounds test. There's no lower bounds test because `offset` is a + // positive displacement from `mMapMinAVMA`, so a small underrun will + // manifest as `len` being close to 2^32. + MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA); + } +#endif + + // Sort by start addresses. + std::sort(mExtents.begin(), mExtents.end(), + [](const Extent& ext1, const Extent& ext2) { + return ext1.offset() < ext2.offset(); + }); + + // Iteratively truncate any overlaps and remove any zero length + // entries that might result, or that may have been present + // initially. Unless the input is seriously screwy, this is + // expected to iterate only once. + while (true) { + size_t i; + size_t n = mExtents.size(); + size_t nZeroLen = 0; + + if (n == 0) { + break; + } + + for (i = 1; i < n; ++i) { + Extent* prev = &mExtents[i - 1]; + Extent* here = &mExtents[i]; + MOZ_ASSERT(prev->offset() <= here->offset()); + if (prev->offset() + prev->len() > here->offset()) { + prev->setLen(here->offset() - prev->offset()); + } + if (prev->len() == 0) { + nZeroLen++; + } + } + + if (mExtents[n - 1].len() == 0) { + nZeroLen++; + } + + // At this point, the entries are in-order and non-overlapping. + // If none of them are zero-length, we are done. + if (nZeroLen == 0) { + break; + } + + // Slide back the entries to remove the zero length ones. + size_t j = 0; // The write-point. + for (i = 0; i < n; ++i) { + if (mExtents[i].len() == 0) { + continue; + } + if (j != i) { + mExtents[j] = mExtents[i]; + } + ++j; + } + MOZ_ASSERT(i == n); + MOZ_ASSERT(nZeroLen <= n); + MOZ_ASSERT(j == n - nZeroLen); + while (nZeroLen > 0) { + mExtents.pop_back(); + nZeroLen--; + } + + MOZ_ASSERT(mExtents.size() == j); + } + + size_t nExtents = mExtents.size(); + +#ifdef DEBUG + // Do a final check on the extents: their address ranges must be + // ascending, non overlapping, non zero sized. + if (nExtents > 0) { + MOZ_ASSERT(mExtents[0].len() > 0); + for (size_t i = 1; i < nExtents; ++i) { + const Extent* prev = &mExtents[i - 1]; + const Extent* here = &mExtents[i]; + MOZ_ASSERT(prev->offset() < here->offset()); + MOZ_ASSERT(here->len() > 0); + MOZ_ASSERT(prev->offset() + prev->len() <= here->offset()); + } + } +#endif + + // Create the final dictionary by enumerating the uniqifier. + size_t nUniques = mUniqifier->count(); + + RuleSet dummy; + mozilla::PodZero(&dummy); + + mDictionary.reserve(nUniques); + for (size_t i = 0; i < nUniques; i++) { + mDictionary.push_back(dummy); + } + + for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) { + MOZ_RELEASE_ASSERT(iter.get().value() < nUniques); + mDictionary[iter.get().value()] = iter.get().key(); + } + + mUniqifier = nullptr; + + char buf[150]; + SprintfLiteral( + buf, + "PrepareRuleSets: %lu extents, %lu rulesets, " + "avma min/max 0x%llx, 0x%llx\n", + (unsigned long int)nExtents, (unsigned long int)mDictionary.size(), + (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Is now usable for binary search. + mUsable = true; + +#if 0 + mLog("\nRulesets after preening\n"); + for (size_t i = 0; i < nExtents; ++i) { + const Extent* extent = &mExtents[i]; + uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset(); + mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog); + mLog("\n"); + } + mLog("\n"); +#endif +} + +bool SecMap::IsEmpty() { return mExtents.empty(); } + +size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that these calls would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mPfxInstrs.data()); + + if (mUniqifier) { + n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf); + } + n += aMallocSizeOf(mDictionary.data()); + n += aMallocSizeOf(mExtents.data()); + + return n; +} + +//////////////////////////////////////////////////////////////// +// SegArray // +//////////////////////////////////////////////////////////////// + +// A SegArray holds a set of address ranges that together exactly +// cover an address range, with no overlaps or holes. Each range has +// an associated value, which in this case has been specialised to be +// a simple boolean. The representation is kept to minimal canonical +// form in which adjacent ranges with the same associated value are +// merged together. Each range is represented by a |struct Seg|. +// +// SegArrays are used to keep track of which parts of the address +// space are known to contain instructions. +class SegArray { + public: + void add(uintptr_t lo, uintptr_t hi, bool val) { + if (lo > hi) { + return; + } + split_at(lo); + if (hi < UINTPTR_MAX) { + split_at(hi + 1); + } + std::vector::size_type iLo, iHi, i; + iLo = find(lo); + iHi = find(hi); + for (i = iLo; i <= iHi; ++i) { + mSegs[i].val = val; + } + preen(); + } + + // RUNS IN NO-MALLOC CONTEXT + bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min, + /*OUT*/ uintptr_t* rx_max, uintptr_t addr) { + std::vector::size_type i = find(addr); + if (!mSegs[i].val) { + return false; + } + *rx_min = mSegs[i].lo; + *rx_max = mSegs[i].hi; + return true; + } + + SegArray() { + Seg s(0, UINTPTR_MAX, false); + mSegs.push_back(s); + } + + private: + struct Seg { + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} + uintptr_t lo; + uintptr_t hi; + bool val; + }; + + void preen() { + for (std::vector::iterator iter = mSegs.begin(); + iter < mSegs.end() - 1; ++iter) { + if (iter[0].val != iter[1].val) { + continue; + } + iter[0].hi = iter[1].hi; + mSegs.erase(iter + 1); + // Back up one, so as not to miss an opportunity to merge + // with the entry after this one. + --iter; + } + } + + // RUNS IN NO-MALLOC CONTEXT + std::vector::size_type find(uintptr_t a) { + long int lo = 0; + long int hi = (long int)mSegs.size(); + while (true) { + // The unsearched space is lo .. hi inclusive. + if (lo > hi) { + // Not found. This can't happen. + return (std::vector::size_type)(-1); + } + long int mid = lo + ((hi - lo) / 2); + uintptr_t mid_lo = mSegs[mid].lo; + uintptr_t mid_hi = mSegs[mid].hi; + if (a < mid_lo) { + hi = mid - 1; + continue; + } + if (a > mid_hi) { + lo = mid + 1; + continue; + } + return (std::vector::size_type)mid; + } + } + + void split_at(uintptr_t a) { + std::vector::size_type i = find(a); + if (mSegs[i].lo == a) { + return; + } + mSegs.insert(mSegs.begin() + i + 1, mSegs[i]); + mSegs[i].hi = a - 1; + mSegs[i + 1].lo = a; + } + + void show() { + printf("<< %d entries:\n", (int)mSegs.size()); + for (std::vector::iterator iter = mSegs.begin(); iter < mSegs.end(); + ++iter) { + printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo, + (unsigned long long int)(*iter).hi, + (*iter).val ? "true" : "false"); + } + printf(">>\n"); + } + + std::vector mSegs; +}; + +//////////////////////////////////////////////////////////////// +// PriMap // +//////////////////////////////////////////////////////////////// + +class PriMap { + public: + explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {} + + // RUNS IN NO-MALLOC CONTEXT + pair*> Lookup(uintptr_t ia) { + SecMap* sm = FindSecMap(ia); + return pair*>( + sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr); + } + + // Add a secondary map. No overlaps allowed w.r.t. existing + // secondary maps. + void AddSecMap(mozilla::UniquePtr&& aSecMap) { + // We can't add an empty SecMap to the PriMap. But that's OK + // since we'd never be able to find anything in it anyway. + if (aSecMap->IsEmpty()) { + return; + } + + // Iterate through the SecMaps and find the right place for this + // one. At the same time, ensure that the in-order + // non-overlapping invariant is preserved (and, generally, holds). + // FIXME: this gives a cost that is O(N^2) in the total number of + // shared objects in the system. ToDo: better. + MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA); + + size_t num_secMaps = mSecMaps.size(); + uintptr_t i; + for (i = 0; i < num_secMaps; ++i) { + mozilla::UniquePtr& sm_i = mSecMaps[i]; + MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA); + if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) { + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. + break; + } + } + MOZ_ASSERT(i <= num_secMaps); + if (i == num_secMaps) { + // It goes at the end. + mSecMaps.push_back(std::move(aSecMap)); + } else { + std::vector>::iterator iter = + mSecMaps.begin() + i; + mSecMaps.insert(iter, std::move(aSecMap)); + } + char buf[100]; + SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", + (int)mSecMaps.size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + + // Remove and delete any SecMaps in the mapping, that intersect + // with the specified address range. + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { + MOZ_ASSERT(avma_min <= avma_max); + size_t num_secMaps = mSecMaps.size(); + if (num_secMaps > 0) { + intptr_t i; + // Iterate from end to start over the vector, so as to ensure + // that the special case where |avma_min| and |avma_max| denote + // the entire address space, can be completed in time proportional + // to the number of elements in the map. + for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) { + mozilla::UniquePtr& sm_i = mSecMaps[i]; + if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) { + // There's no overlap. Move on. + continue; + } + // We need to remove mSecMaps[i] and slide all those above it + // downwards to cover the hole. + mSecMaps.erase(mSecMaps.begin() + i); + } + } + } + + // Return the number of currently contained SecMaps. + size_t CountSecMaps() { return mSecMaps.size(); } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that this call would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mSecMaps.data()); + + for (size_t i = 0; i < mSecMaps.size(); i++) { + n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf); + } + + return n; + } + + private: + // RUNS IN NO-MALLOC CONTEXT + SecMap* FindSecMap(uintptr_t ia) { + // Binary search mSecMaps to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. + long int lo = 0; + long int hi = (long int)mSecMaps.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + mozilla::UniquePtr& mid_secMap = mSecMaps[mid]; + uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA; + uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_secMap.get(); + } + // NOTREACHED + } + + private: + // sorted array of per-object ranges, non overlapping, non empty + std::vector> mSecMaps; + + // a logging sink, for debugging. + void (*mLog)(const char*); +}; + +//////////////////////////////////////////////////////////////// +// LUL // +//////////////////////////////////////////////////////////////// + +#define LUL_LOG(_str) \ + do { \ + char buf[200]; \ + SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \ + uint64_t(profiler_current_process_id().ToNumber()), \ + uint64_t(profiler_current_thread_id().ToNumber()), this, \ + (_str)); \ + buf[sizeof(buf) - 1] = 0; \ + mLog(buf); \ + } while (0) + +LUL::LUL(void (*aLog)(const char*)) + : mLog(aLog), + mAdminMode(true), + mAdminThreadId(profiler_current_thread_id()), + mPriMap(new PriMap(aLog)), + mSegArray(new SegArray()), + mUSU(new UniqueStringUniverse()) { + LUL_LOG("LUL::LUL: Created object"); +} + +LUL::~LUL() { + LUL_LOG("LUL::~LUL: Destroyed object"); + delete mPriMap; + delete mSegArray; + mLog = nullptr; + delete mUSU; +} + +void LUL::MaybeShowStats() { + // This is racey in the sense that it can't guarantee that + // n_new == n_new_Context + n_new_CFI + n_new_Scanned + // if it should happen that mStats is updated by some other thread + // in between computation of n_new and n_new_{Context,CFI,FP}. + // But it's just stats printing, so we don't really care. + uint32_t n_new = mStats - mStatsPrevious; + if (n_new >= 5000) { + uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; + uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; + uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP; + mStatsPrevious = mStats; + char buf[200]; + SprintfLiteral(buf, + "LUL frame stats: TOTAL %5u" + " CTX %4u CFI %4u FP %4u", + n_new, n_new_Context, n_new_CFI, n_new_FP); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } +} + +size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + n += mPriMap->SizeOfIncludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mSegArray + // - mUSU + + return n; +} + +void LUL::EnableUnwinding() { + LUL_LOG("LUL::EnableUnwinding"); + // Don't assert for Admin mode here. That is, tolerate a call here + // if we are already in Unwinding mode. + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mAdminMode = false; +} + +void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize, + aFileName); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // We can't have a SecMap covering more than 2^32-1 bytes of address space. + // See the definition of SecMap for why. Rather than crash the system, just + // limit the SecMap's size accordingly. This case is never actually + // expected to happen. + if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) { + aSize = (uintptr_t)0xFFFFFFFF; + } + MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Here's a new mapping, for this object. + mozilla::UniquePtr smap = + mozilla::MakeUnique(aRXavma, (uint32_t)aSize, mLog); + + // Read CFI or EXIDX unwind data into |smap|. + if (!aMappedImage) { + (void)lul::ReadSymbolData(string(aFileName), std::vector(), + smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } else { + (void)lul::ReadSymbolDataInternal( + (const uint8_t*)aMappedImage, string(aFileName), + std::vector(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } + + mLog("NotifyMap .. preparing entries\n"); + + smap->PrepareRuleSets(); + + SprintfLiteral(buf, "NotifyMap got %lld entries\n", + (long long int)smap->Size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Add it to the primary map (the top level set of mapped objects). + mPriMap->AddSecMap(std::move(smap)); + + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", + (unsigned long long int)aRXavma, + (unsigned long long int)aSize); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[100]; + SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", + (unsigned long long int)aRXavmaMin, + (unsigned long long int)aRXavmaMax); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); + + // Remove from the primary map, any secondary maps that intersect + // with the address range. Also delete the secondary maps. + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); + + // Tell the segment array that the address range no longer + // contains valid code. + mSegArray->add(aRXavmaMin, aRXavmaMax, false); + + SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", + (int)mPriMap->CountSecMaps()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +} + +size_t LUL::CountMappings() { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + return mPriMap->CountSecMaps(); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) { + if (!aAddr.Valid()) { + return TaggedUWord(); + } + + // Lower limit check. |aAddr.Value()| is the lowest requested address + // and |aStackImg->mStartAvma| is the lowest address we actually have, + // so the comparison is straightforward. + if (aAddr.Value() < aStackImg->mStartAvma) { + return TaggedUWord(); + } + + // Upper limit check. We must compute the highest requested address + // and the highest address we actually have, but being careful to + // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the + // 3/7 values below that, then we will get overflow. See bug #1245477. + typedef CheckedInt CheckedUWord; + CheckedUWord highest_requested_plus_one = + CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); + CheckedUWord highest_available_plus_one = + CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); + if (!highest_requested_plus_one.isValid() // overflow? + || !highest_available_plus_one.isValid() // overflow? + || (highest_requested_plus_one.value() > + highest_available_plus_one.value())) { // in range? + return TaggedUWord(); + } + + return TaggedUWord( + *(uintptr_t*)(&aStackImg + ->mContents[aAddr.Value() - aStackImg->mStartAvma])); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, + TaggedUWord aCFA) { + switch (aReg) { + case DW_REG_CFA: + return aCFA; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return aOldRegs->xbp; + case DW_REG_INTEL_XSP: + return aOldRegs->xsp; + case DW_REG_INTEL_XIP: + return aOldRegs->xip; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return aOldRegs->r7; + case DW_REG_ARM_R11: + return aOldRegs->r11; + case DW_REG_ARM_R12: + return aOldRegs->r12; + case DW_REG_ARM_R13: + return aOldRegs->r13; + case DW_REG_ARM_R14: + return aOldRegs->r14; + case DW_REG_ARM_R15: + return aOldRegs->r15; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return aOldRegs->x29; + case DW_REG_AARCH64_X30: + return aOldRegs->x30; + case DW_REG_AARCH64_SP: + return aOldRegs->sp; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return aOldRegs->sp; + case DW_REG_MIPS_FP: + return aOldRegs->fp; + case DW_REG_MIPS_PC: + return aOldRegs->pc; +#else +# error "Unsupported arch" +#endif + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +// See prototype for comment. +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector& aPfxInstrs) { + // A small evaluation stack, and a stack pointer, which points to + // the highest numbered in-use element. + const int N_STACK = 10; + TaggedUWord stack[N_STACK]; + int stackPointer = -1; + for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord(); + +#define PUSH(_tuw) \ + do { \ + if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \ + stack[++stackPointer] = (_tuw); \ + } while (0) + +#define POP(_lval) \ + do { \ + if (stackPointer < 0) goto fail; /* underflow */ \ + _lval = stack[stackPointer--]; \ + } while (0) + + // Cursor in the instruction sequence. + size_t curr = start + 1; + + // Check the start point is sane. + size_t nInstrs = aPfxInstrs.size(); + if (start < 0 || (size_t)start >= nInstrs) goto fail; + + { + // The instruction sequence must start with PX_Start. If not, + // something is seriously wrong. + PfxInstr first = aPfxInstrs[start]; + if (first.mOpcode != PX_Start) goto fail; + + // Push the CFA on the stack to start with (or not), as required by + // the original DW_OP_*expression* CFI. + if (first.mOperand != 0) PUSH(aCFA); + } + + while (true) { + if (curr >= nInstrs) goto fail; // ran off the end of the sequence + + PfxInstr pfxi = aPfxInstrs[curr++]; + if (pfxi.mOpcode == PX_End) break; // we're done + + switch (pfxi.mOpcode) { + case PX_Start: + // This should appear only at the start of the sequence. + goto fail; + case PX_End: + // We just took care of that, so we shouldn't see it again. + MOZ_ASSERT(0); + goto fail; + case PX_SImm32: + PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); + break; + case PX_DwReg: { + DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; + MOZ_ASSERT(reg != DW_REG_CFA); + PUSH(EvaluateReg(reg, aOldRegs, aCFA)); + break; + } + case PX_Deref: { + TaggedUWord addr; + POP(addr); + PUSH(DerefTUW(addr, aStackImg)); + break; + } + case PX_Add: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y + x); + break; + } + case PX_Sub: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y - x); + break; + } + case PX_And: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y & x); + break; + } + case PX_Or: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y | x); + break; + } + case PX_CmpGES: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y.CmpGEs(x)); + break; + } + case PX_Shl: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y << x); + break; + } + default: + MOZ_ASSERT(0); + goto fail; + } + } // while (true) + + // Evaluation finished. The top value on the stack is the result. + if (stackPointer >= 0) { + return stack[stackPointer]; + } + // Else fall through + +fail: + return TaggedUWord(); + +#undef PUSH +#undef POP +} + +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector* aPfxInstrs) const { + switch (mHow) { + case UNKNOWN: + return TaggedUWord(); + case NODEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return tuw; + } + case DEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return DerefTUW(tuw, aStackImg); + } + case PFXEXPR: { + MOZ_ASSERT(aPfxInstrs); + if (!aPfxInstrs) { + return TaggedUWord(); + } + return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); + } + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg, + const RuleSet* aRS, const vector* aPfxInstrs) { + // Take a copy of regs, since we'll need to refer to the old values + // whilst computing the new ones. + UnwindRegs old_regs = *aRegs; + + // Mark all the current register values as invalid, so that the + // caller can see, on our return, which ones have been computed + // anew. If we don't even manage to compute a new PC value, then + // the caller will have to abandon the unwind. + // FIXME: Create and use instead: aRegs->SetAllInvalid(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = TaggedUWord(); + aRegs->xsp = TaggedUWord(); + aRegs->xip = TaggedUWord(); +#elif defined(GP_ARCH_arm) + aRegs->r7 = TaggedUWord(); + aRegs->r11 = TaggedUWord(); + aRegs->r12 = TaggedUWord(); + aRegs->r13 = TaggedUWord(); + aRegs->r14 = TaggedUWord(); + aRegs->r15 = TaggedUWord(); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = TaggedUWord(); + aRegs->x30 = TaggedUWord(); + aRegs->sp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#elif defined(GP_ARCH_mips64) + aRegs->sp = TaggedUWord(); + aRegs->fp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#else +# error "Unsupported arch" +#endif + + // This is generally useful. + const TaggedUWord inval = TaggedUWord(); + + // First, compute the CFA. + TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/, + aStackImg, aPfxInstrs); + + // If we didn't manage to compute the CFA, well .. that's ungood, + // but keep going anyway. It'll be OK provided none of the register + // value rules mention the CFA. In any case, compute the new values + // for each register that we're tracking. + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = + aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xsp = + aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xip = + aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm) + aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r11 = + aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r12 = + aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r13 = + aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r14 = + aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r15 = + aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = + aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->x30 = + aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_mips64) + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#else +# error "Unsupported arch" +#endif + + // We're done. Any regs for which we didn't manage to compute a + // new value will now be marked as invalid. +} + +// RUNS IN NO-MALLOC CONTEXT +void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, + size_t aFramesAvail, UnwindRegs* aStartRegs, + StackImage* aStackImg) { + MOZ_RELEASE_ASSERT(!mAdminMode); + + ///////////////////////////////////////////////////////// + // BEGIN UNWIND + + *aFramesUsed = 0; + + UnwindRegs regs = *aStartRegs; + TaggedUWord last_valid_sp = TaggedUWord(); + + while (true) { + if (DEBUG_MAIN) { + char buf[300]; + mLog("\n"); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + SprintfLiteral( + buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm) + SprintfLiteral( + buf, + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm64) + SprintfLiteral( + buf, + "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx" + " sp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(), + (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_mips64) + SprintfLiteral( + buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(), + (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#else +# error "Unsupported arch" +#endif + } + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord ia = regs.xip; + TaggedUWord sp = regs.xsp; +#elif defined(GP_ARCH_arm) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); + TaggedUWord sp = regs.r13; +#elif defined(GP_ARCH_arm64) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30); + TaggedUWord sp = regs.sp; +#elif defined(GP_ARCH_mips64) + TaggedUWord ia = regs.pc; + TaggedUWord sp = regs.sp; +#else +# error "Unsupported arch" +#endif + + if (*aFramesUsed >= aFramesAvail) { + break; + } + + // If we don't have a valid value for the PC, give up. + if (!ia.Valid()) { + break; + } + + // If this is the innermost frame, record the SP value, which + // presumably is valid. If this isn't the innermost frame, and we + // have a valid SP value, check that its SP value isn't less that + // the one we've seen so far, so as to catch potential SP value + // cycles. + if (*aFramesUsed == 0) { + last_valid_sp = sp; + } else { + MOZ_ASSERT(last_valid_sp.Valid()); + if (sp.Valid()) { + if (sp.Value() < last_valid_sp.Value()) { + // Hmm, SP going in the wrong direction. Let's stop. + break; + } + // Remember where we got to. + last_valid_sp = sp; + } + } + + aFramePCs[*aFramesUsed] = ia.Value(); + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; + (*aFramesUsed)++; + + // Find the RuleSet for the current IA, if any. This will also + // query the backing (secondary) maps if it isn't found in the + // thread-local cache. + + // If this isn't the innermost frame, back up into the calling insn. + if (*aFramesUsed > 1) { + ia = ia + TaggedUWord((uintptr_t)(-1)); + } + + pair*> ruleset_and_pfxinstrs = + mPriMap->Lookup(ia.Value()); + const RuleSet* ruleset = ruleset_and_pfxinstrs.first; + const vector* pfxinstrs = ruleset_and_pfxinstrs.second; + + if (DEBUG_MAIN) { + char buf[100]; + SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", + (unsigned long long int)ia.Value(), ruleset); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + +#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + ///////////////////////////////////////////// + //// + // On 32 bit x86-linux, syscalls are often done via the VDSO + // function __kernel_vsyscall, which doesn't have a corresponding + // object that we can read debuginfo from. That effectively kills + // off all stack traces for threads blocked in syscalls. Hence + // special-case by looking at the code surrounding the program + // counter. + // + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx + // 0xf7757421 <__kernel_vsyscall+1>: push %edx + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp + // 0xf7757425 <__kernel_vsyscall+5>: sysenter + // 0xf7757427 <__kernel_vsyscall+7>: nop + // 0xf7757428 <__kernel_vsyscall+8>: nop + // 0xf7757429 <__kernel_vsyscall+9>: nop + // 0xf775742a <__kernel_vsyscall+10>: nop + // 0xf775742b <__kernel_vsyscall+11>: nop + // 0xf775742c <__kernel_vsyscall+12>: nop + // 0xf775742d <__kernel_vsyscall+13>: nop + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx + // 0xf7757433 <__kernel_vsyscall+19>: ret + // + // In cases where the sampled thread is blocked in a syscall, its + // program counter will point at "pop %ebp". Hence we look for + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and + // the corresponding register-recovery actions are: + // new_ebp = *(old_esp + 0) + // new eip = *(old_esp + 12) + // new_esp = old_esp + 16 + // + // It may also be the case that the program counter points two + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in + // the case where the syscall has been restarted but the thread + // hasn't been rescheduled. The code below doesn't handle that; + // it could easily be made to. + // + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { + uintptr_t insns_min, insns_max; + uintptr_t eip = ia.Value(); + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { + uint8_t* eipC = (uint8_t*)eip; + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { + TaggedUWord sp_plus_0 = sp; + TaggedUWord sp_plus_12 = sp; + TaggedUWord sp_plus_16 = sp; + sp_plus_12 = sp_plus_12 + TaggedUWord(12); + sp_plus_16 = sp_plus_16 + TaggedUWord(16); + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); + TaggedUWord new_esp = sp_plus_16; + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { + regs.xbp = new_ebp; + regs.xip = new_eip; + regs.xsp = new_esp; + continue; + } + } + } + } + //// + ///////////////////////////////////////////// +#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + + // So, do we have a ruleset for this address? If so, use it now. + if (ruleset) { + if (DEBUG_MAIN) { + ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog); + mLog("\n"); + } + // Use the RuleSet to compute the registers for the previous + // frame. |regs| is modified in-place. + UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); + continue; + } + +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_amd64_freebsd) + // There's no RuleSet for the specified address. On amd64/x86_linux, see if + // it's possible to recover the caller's frame by using the frame pointer. + + // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image), + // and assume the following layout: + // + // <--- new_SP + // +----------+ + // | new_IP | (return address) + // +----------+ + // | new_BP | <--- old_BP + // +----------+ + // | .... | + // | .... | + // | .... | + // +----------+ <---- old_SP (arbitrary, but must be <= old_BP) + + const size_t wordSzB = sizeof(uintptr_t); + TaggedUWord old_xsp = regs.xsp; + + // points at new_BP ? + TaggedUWord old_xbp = regs.xbp; + // points at new_IP ? + TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB); + // is the new_SP ? + TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB); + + if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() && + old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) { + // We don't need to do any range, alignment or validity checks for + // addresses passed to DerefTUW, since that performs them itself, and + // returns an invalid value on failure. Any such value will poison + // subsequent uses, and we do a final check for validity before putting + // the computed values into |regs|. + TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg); + if (new_xbp.Valid() && new_xbp.IsAligned() && + old_xbp.Value() < new_xbp.Value()) { + TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg); + TaggedUWord new_xsp = old_xbp_plus2; + if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) { + regs.xbp = new_xbp; + regs.xip = new_xip; + regs.xsp = new_xsp; + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#elif defined(GP_ARCH_arm64) + // Here is an example of generated code for prologue and epilogue.. + // + // stp x29, x30, [sp, #-16]! + // mov x29, sp + // ... + // ldp x29, x30, [sp], #16 + // ret + // + // Next is another example of generated code. + // + // stp x20, x19, [sp, #-32]! + // stp x29, x30, [sp, #16] + // add x29, sp, #0x10 + // ... + // ldp x29, x30, [sp, #16] + // ldp x20, x19, [sp], #32 + // ret + // + // Previous x29 and x30 register are stored in the address of x29 register. + // But since sp register value depends on local variables, we cannot compute + // previous sp register from current sp/fp/lr register and there is no + // regular rule for sp register in prologue. But since return address is lr + // register, if x29 is valid, we will get return address without sp + // register. + // + // So we assume the following layout that if no rule set. x29 is frame + // pointer, so we will be able to compute x29 and x30 . + // + // +----------+ <--- new_sp (cannot compute) + // | .... | + // +----------+ + // | new_lr | (return address) + // +----------+ + // | new_fp | <--- old_fp + // +----------+ + // | .... | + // | .... | + // +----------+ <---- old_sp (arbitrary, but unused) + + TaggedUWord old_fp = regs.x29; + if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() && + last_valid_sp.Value() <= old_fp.Value()) { + TaggedUWord new_fp = DerefTUW(old_fp, aStackImg); + if (new_fp.Valid() && new_fp.IsAligned() && + old_fp.Value() < new_fp.Value()) { + TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8); + TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg); + if (new_lr.Valid()) { + regs.x29 = new_fp; + regs.x30 = new_lr; + // When using frame pointer to walk stack, we cannot compute sp + // register since we cannot compute sp register from fp/lr/sp + // register, and there is no regular rule to compute previous sp + // register. So mark as invalid. + regs.sp = TaggedUWord(); + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || + // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || + // defined(GP_PLAT_amd64_freebsd) + + // We failed to recover a frame either using CFI or FP chasing, and we + // have no other ways to recover the frame. So we have to give up. + break; + + } // top level unwind loop + + // END UNWIND + ///////////////////////////////////////////////////////// +} + +//////////////////////////////////////////////////////////////// +// LUL Unit Testing // +//////////////////////////////////////////////////////////////// + +static const int LUL_UNIT_TEST_STACK_SIZE = 32768; + +#if defined(GP_ARCH_mips64) +static __attribute__((noinline)) unsigned long __getpc(void) { + unsigned long rtaddr; + __asm__ volatile("move %0, $31" : "=r"(rtaddr)); + return rtaddr; +} +#endif + +// This function is innermost in the test call sequence. It uses LUL +// to unwind, and compares the result with the sequence specified in +// the director string. These need to agree in order for the test to +// pass. In order not to screw up the results, this function needs +// to have a not-very big stack frame, since we're only presenting +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and +// that chunk unavoidably includes the frame for this function. +// +// This function must not be inlined into its callers. Doing so will +// cause the expected-vs-actual backtrace consistency checking to +// fail. Prints summary results to |aLUL|'s logging sink and also +// returns a boolean indicating whether or not the test failed. +static __attribute__((noinline)) bool GetAndCheckStackTrace( + LUL* aLUL, const char* dstring) { + // Get hold of the current unwind-start registers. + UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); +#if defined(GP_ARCH_amd64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "leaq 0(%%rip), %%r15" + "\n\t" + "movq %%r15, 0(%0)" + "\n\t" + "movq %%rsp, 8(%0)" + "\n\t" + "movq %%rbp, 16(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "r15"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 128; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 12); + __asm__ __volatile__( + ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/ + "\n\t" + "popl %%edi" + "\n\t" + "movl %%edi, 0(%0)" + "\n\t" + "movl %%esp, 4(%0)" + "\n\t" + "movl %%ebp, 8(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "edi"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + volatile uintptr_t block[6]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "mov r0, r15" + "\n\t" + "str r0, [%0, #0]" + "\n\t" + "str r14, [%0, #4]" + "\n\t" + "str r13, [%0, #8]" + "\n\t" + "str r12, [%0, #12]" + "\n\t" + "str r11, [%0, #16]" + "\n\t" + "str r7, [%0, #20]" + "\n" + : + : "r"(&block[0]) + : "memory", "r0"); + startRegs.r15 = TaggedUWord(block[0]); + startRegs.r14 = TaggedUWord(block[1]); + startRegs.r13 = TaggedUWord(block[2]); + startRegs.r12 = TaggedUWord(block[3]); + startRegs.r11 = TaggedUWord(block[4]); + startRegs.r7 = TaggedUWord(block[5]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_arm64) + volatile uintptr_t block[4]; + MOZ_ASSERT(sizeof(block) == 32); + __asm__ __volatile__( + "adr x0, . \n\t" + "str x0, [%0, #0] \n\t" + "str x29, [%0, #8] \n\t" + "str x30, [%0, #16] \n\t" + "mov x0, sp \n\t" + "str x0, [%0, #24] \n\t" + : + : "r"(&block[0]) + : "memory", "x0"); + startRegs.pc = TaggedUWord(block[0]); + startRegs.x29 = TaggedUWord(block[1]); + startRegs.x30 = TaggedUWord(block[2]); + startRegs.sp = TaggedUWord(block[3]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_mips64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "sd $29, 8(%0) \n" + "sd $30, 16(%0) \n" + : + : "r"(block) + : "memory"); + block[0] = __getpc(); + startRegs.pc = TaggedUWord(block[0]); + startRegs.sp = TaggedUWord(block[1]); + startRegs.fp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#else +# error "Unsupported platform" +#endif + + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the + // stack. + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + StackImage* stackImg = new StackImage(); + stackImg->mLen = nToCopy; + stackImg->mStartAvma = start; + if (nToCopy > 0) { + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); + } + + // Unwind it. + const int MAX_TEST_FRAMES = 64; + uintptr_t framePCs[MAX_TEST_FRAMES]; + uintptr_t frameSPs[MAX_TEST_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t framePointerFramesAcquired = 0; + aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed, + &framePointerFramesAcquired, framesAvail, &startRegs, stackImg); + + delete stackImg; + + // if (0) { + // // Show what we have. + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); + // for (size_t i = 0; i < framesUsed; i++) { + // fprintf(stderr, " [%2d] SP %p PC %p\n", + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); + // } + // fprintf(stderr, "\n"); + //} + + // Check to see if there's a consistent binding between digits in + // the director string ('1' .. '8') and the PC values acquired by + // the unwind. If there isn't, the unwinding has failed somehow. + uintptr_t binding[8]; // binding for '1' .. binding for '8' + memset((void*)binding, 0, sizeof(binding)); + + // The general plan is to work backwards along the director string + // and forwards along the framePCs array. Doing so corresponds to + // working outwards from the innermost frame of the recursive test set. + const char* cursor = dstring; + + // Find the end. This leaves |cursor| two bytes past the first + // character we want to look at -- see comment below. + while (*cursor) cursor++; + + // Counts the number of consistent frames. + size_t nConsistent = 0; + + // Iterate back to the start of the director string. The starting + // points are a bit complex. We can't use framePCs[0] because that + // contains the PC in this frame (above). We can't use framePCs[1] + // because that will contain the PC at return point in the recursive + // test group (TestFn[1-8]) for their call "out" to this function, + // GetAndCheckStackTrace. Although LUL will compute a correct + // return address, that will not be the same return address as for a + // recursive call out of the the function to another function in the + // group. Hence we can only start consistency checking at + // framePCs[2]. + // + // To be consistent, then, we must ignore the last element in the + // director string as that corresponds to framePCs[1]. Hence the + // start points are: framePCs[2] and the director string 2 bytes + // before the terminating zero. + // + // Also as a result of this, the number of consistent frames counted + // will always be one less than the length of the director string + // (not including its terminating zero). + size_t frameIx; + for (cursor = cursor - 2, frameIx = 2; + cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) { + char c = *cursor; + uintptr_t pc = framePCs[frameIx]; + // If this doesn't hold, the director string is ill-formed. + MOZ_ASSERT(c >= '1' && c <= '8'); + int n = ((int)c) - ((int)'1'); + if (binding[n] == 0) { + // There's no binding for |c| yet, so install |pc| and carry on. + binding[n] = pc; + nConsistent++; + continue; + } + // There's a pre-existing binding for |c|. Check it's consistent. + if (binding[n] != pc) { + // Not consistent. Give up now. + break; + } + // Consistent. Keep going. + nConsistent++; + } + + // So, did we succeed? + bool passed = nConsistent + 1 == strlen(dstring); + + // Show the results. + char buf[200]; + SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n", + (int)nConsistent, (int)strlen(dstring), + passed ? "PASS" : "FAIL"); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + + return !passed; +} + +// Macro magic to create a set of 8 mutually recursive functions with +// varying frame sizes. These will recurse amongst themselves as +// specified by |strP|, the directory string, and call +// GetAndCheckStackTrace when the string becomes empty, passing it the +// original value of the string. This checks the result, printing +// results on |aLUL|'s logging sink, and also returns a boolean +// indicating whether or not the results are acceptable (correct). + +#define DECL_TEST_FN(NAME) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); + +#define GEN_TEST_FN(NAME, FRAMESIZE) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ + /* Create a frame of size (at least) FRAMESIZE, so that the */ \ + /* 8 functions created by this macro offer some variation in frame */ \ + /* sizes. This isn't as simple as it might seem, since a clever */ \ + /* optimizing compiler (eg, clang-5) detects that the array is unused */ \ + /* and removes it. We try to defeat this by passing it to a function */ \ + /* in a different compilation unit, and hoping that clang does not */ \ + /* notice that the call is a no-op. */ \ + char space[FRAMESIZE]; \ + Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \ + \ + if (*strP == '\0') { \ + /* We've come to the end of the director string. */ \ + /* Take a stack snapshot. */ \ + /* We purposefully use a negation to avoid tail-call optimization */ \ + return !GetAndCheckStackTrace(aLUL, strPorig); \ + } else { \ + /* Recurse onwards. This is a bit subtle. The obvious */ \ + /* thing to do here is call onwards directly, from within the */ \ + /* arms of the case statement. That gives a problem in that */ \ + /* there will be multiple return points inside each function when */ \ + /* unwinding, so it will be difficult to check for consistency */ \ + /* against the director string. Instead, we make an indirect */ \ + /* call, so as to guarantee that there is only one call site */ \ + /* within each function. This does assume that the compiler */ \ + /* won't transform it back to the simple direct-call form. */ \ + /* To discourage it from doing so, the call is bracketed with */ \ + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ + switch (*strP) { \ + case '1': \ + nextFn = TestFn1; \ + break; \ + case '2': \ + nextFn = TestFn2; \ + break; \ + case '3': \ + nextFn = TestFn3; \ + break; \ + case '4': \ + nextFn = TestFn4; \ + break; \ + case '5': \ + nextFn = TestFn5; \ + break; \ + case '6': \ + nextFn = TestFn6; \ + break; \ + case '7': \ + nextFn = TestFn7; \ + break; \ + case '8': \ + nextFn = TestFn8; \ + break; \ + default: \ + nextFn = TestFn8; \ + break; \ + } \ + /* "use" |space| immediately after the recursive call, */ \ + /* so as to dissuade clang from deallocating the space while */ \ + /* the call is active, or otherwise messing with the stack frame. */ \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + bool passed = nextFn(aLUL, strPorig, strP + 1); \ + Unused << write(1, space, 0); \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + return passed; \ + } \ + } + +// The test functions are mutually recursive, so it is necessary to +// declare them before defining them. +DECL_TEST_FN(TestFn1) +DECL_TEST_FN(TestFn2) +DECL_TEST_FN(TestFn3) +DECL_TEST_FN(TestFn4) +DECL_TEST_FN(TestFn5) +DECL_TEST_FN(TestFn6) +DECL_TEST_FN(TestFn7) +DECL_TEST_FN(TestFn8) + +GEN_TEST_FN(TestFn1, 123) +GEN_TEST_FN(TestFn2, 456) +GEN_TEST_FN(TestFn3, 789) +GEN_TEST_FN(TestFn4, 23) +GEN_TEST_FN(TestFn5, 47) +GEN_TEST_FN(TestFn6, 117) +GEN_TEST_FN(TestFn7, 1) +GEN_TEST_FN(TestFn8, 99) + +// This starts the test sequence going. Call here to generate a +// sequence of calls as directed by the string |dstring|. The call +// sequence will, from its innermost frame, finish by calling +// GetAndCheckStackTrace() and passing it |dstring|. +// GetAndCheckStackTrace() will unwind the stack, check consistency +// of those results against |dstring|, and print a pass/fail message +// to aLUL's logging sink. It also updates the counters in *aNTests +// and aNTestsPassed. +__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests, + /*OUT*/ int* aNTestsPassed, LUL* aLUL, + const char* dstring) { + // Ensure that the stack has at least this much space on it. This + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes + // and hand it to LUL. Safe in the sense that no segfault can + // happen because the stack is at least this big. This is all + // somewhat dubious in the sense that a sufficiently clever compiler + // (clang, for one) can figure out that space[] is unused and delete + // it from the frame. Hence the somewhat elaborate hoop jumping to + // fill it up before the call and to at least appear to use the + // value afterwards. + int i; + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + space[i] = (char)(i & 0x7F); + } + + // Really run the test. + bool passed = TestFn1(aLUL, dstring, dstring); + + // Appear to use space[], by visiting the value to compute some kind + // of checksum, and then (apparently) using the checksum. + int sum = 0; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + // If this doesn't fool LLVM, I don't know what will. + sum += space[i] - 3 * i; + } + __asm__ __volatile__("" : : "r"(sum)); + + // Update the counters. + (*aNTests)++; + if (passed) { + (*aNTestsPassed)++; + } +} + +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL) { + aLUL->mLog(":\n"); + aLUL->mLog("LULUnitTest: BEGIN\n"); + *aNTests = *aNTestsPassed = 0; + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); + TestUnw(aNTests, aNTestsPassed, aLUL, + "123456781122334455667788777777777777777777777"); + aLUL->mLog("LULUnitTest: END\n"); + aLUL->mLog(":\n"); +} + +} // namespace lul diff --git a/tools/profiler/lul/LulMain.h b/tools/profiler/lul/LulMain.h new file mode 100644 index 0000000000..d386bd5c4f --- /dev/null +++ b/tools/profiler/lul/LulMain.h @@ -0,0 +1,378 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMain_h +#define LulMain_h + +#include "PlatformMacros.h" +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/ProfilerUtils.h" + +// LUL: A Lightweight Unwind Library. +// This file provides the end-user (external) interface for LUL. + +// Some comments about naming in the implementation. These are safe +// to ignore if you are merely using LUL, but are important if you +// hack on its internals. +// +// Debuginfo readers in general have tended to use the word "address" +// to mean several different things. This sometimes makes them +// difficult to understand and maintain. LUL tries hard to avoid +// using the word "address" and instead uses the following more +// precise terms: +// +// * SVMA ("Stated Virtual Memory Address"): this is an address of a +// symbol (etc) as it is stated in the symbol table, or other +// metadata, of an object. Such values are typically small and +// start from zero or thereabouts, unless the object has been +// prelinked. +// +// * AVMA ("Actual Virtual Memory Address"): this is the address of a +// symbol (etc) in a running process, that is, once the associated +// object has been mapped into a process. Such values are typically +// much larger than SVMAs, since objects can get mapped arbitrarily +// far along the address space. +// +// * "Bias": the difference between AVMA and SVMA for a given symbol +// (specifically, AVMA - SVMA). The bias is always an integral +// number of pages. Once we know the bias for a given object's +// text section (for example), we can compute the AVMAs of all of +// its text symbols by adding the bias to their SVMAs. +// +// * "Image address": typically, to read debuginfo from an object we +// will temporarily mmap in the file so as to read symbol tables +// etc. Addresses in this temporary mapping are called "Image +// addresses". Note that the temporary mapping is entirely +// unrelated to the mappings of the file that the dynamic linker +// must perform merely in order to get the program to run. Hence +// image addresses are unrelated to either SVMAs or AVMAs. + +namespace lul { + +// A machine word plus validity tag. +class TaggedUWord { + public: + // RUNS IN NO-MALLOC CONTEXT + // Construct a valid one. + explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {} + + // RUNS IN NO-MALLOC CONTEXT + // Construct an invalid one. + TaggedUWord() : mValue(0), mValid(false) {} + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator+(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator-(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator&(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator|(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord CmpGEs(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + intptr_t s1 = (intptr_t)Value(); + intptr_t s2 = (intptr_t)rhs.Value(); + return TaggedUWord(s1 >= s2 ? 1 : 0); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator<<(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + uintptr_t shift = rhs.Value(); + if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + // Is equal? Note: non-validity on either side gives non-equality. + bool operator==(TaggedUWord other) const { + return (mValid && other.Valid()) ? (mValue == other.Value()) : false; + } + + // RUNS IN NO-MALLOC CONTEXT + // Is it word-aligned? + bool IsAligned() const { + return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0; + } + + // RUNS IN NO-MALLOC CONTEXT + uintptr_t Value() const { return mValue; } + + // RUNS IN NO-MALLOC CONTEXT + bool Valid() const { return mValid; } + + private: + uintptr_t mValue; + bool mValid; +}; + +// The registers, with validity tags, that will be unwound. + +struct UnwindRegs { +#if defined(GP_ARCH_arm) + TaggedUWord r7; + TaggedUWord r11; + TaggedUWord r12; + TaggedUWord r13; + TaggedUWord r14; + TaggedUWord r15; +#elif defined(GP_ARCH_arm64) + TaggedUWord x29; + TaggedUWord x30; + TaggedUWord sp; + TaggedUWord pc; +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord xbp; + TaggedUWord xsp; + TaggedUWord xip; +#elif defined(GP_ARCH_mips64) + TaggedUWord sp; + TaggedUWord fp; + TaggedUWord pc; +#else +# error "Unknown plat" +#endif +}; + +// The maximum number of bytes in a stack snapshot. This value can be increased +// if necessary, but testing showed that 160k is enough to obtain good +// backtraces on x86_64 Linux. Most backtraces fit comfortably into 4-8k of +// stack space, but we do have some very deep stacks occasionally. Please see +// the comments in DoNativeBacktrace as to why it's OK to have this value be so +// large. +static const size_t N_STACK_BYTES = 160 * 1024; + +// The stack chunk image that will be unwound. +struct StackImage { + // [start_avma, +len) specify the address range in the buffer. + // Obviously we require 0 <= len <= N_STACK_BYTES. + uintptr_t mStartAvma; + size_t mLen; + uint8_t mContents[N_STACK_BYTES]; +}; + +// Statistics collection for the unwinder. +template +class LULStats { + public: + LULStats() : mContext(0), mCFI(0), mFP(0) {} + + template + explicit LULStats(const LULStats& aOther) + : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {} + + template + LULStats& operator=(const LULStats& aOther) { + mContext = aOther.mContext; + mCFI = aOther.mCFI; + mFP = aOther.mFP; + return *this; + } + + template + uint32_t operator-(const LULStats& aOther) { + return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) + + (mFP - aOther.mFP); + } + + T mContext; // Number of context frames + T mCFI; // Number of CFI/EXIDX frames + T mFP; // Number of frame-pointer recovered frames +}; + +// The core unwinder library class. Just one of these is needed, and +// it can be shared by multiple unwinder threads. +// +// The library operates in one of two modes. +// +// * Admin mode. The library is this state after creation. In Admin +// mode, no unwinding may be performed. It is however allowable to +// perform administrative tasks -- primarily, loading of unwind info +// -- in this mode. In particular, it is safe for the library to +// perform dynamic memory allocation in this mode. Safe in the +// sense that there is no risk of deadlock against unwinding threads +// that might -- because of where they have been sampled -- hold the +// system's malloc lock. +// +// * Unwind mode. In this mode, calls to ::Unwind may be made, but +// nothing else. ::Unwind guarantees not to make any dynamic memory +// requests, so as to guarantee that the calling thread won't +// deadlock in the case where it already holds the system's malloc lock. +// +// The library is created in Admin mode. After debuginfo is loaded, +// the caller must switch it into Unwind mode by calling +// ::EnableUnwinding. There is no way to switch it back to Admin mode +// after that. To safely switch back to Admin mode would require the +// caller (or other external agent) to guarantee that there are no +// pending ::Unwind calls. + +class PriMap; +class SegArray; +class UniqueStringUniverse; + +class LUL { + public: + // Create; supply a logging sink. Sets the object in Admin mode. + explicit LUL(void (*aLog)(const char*)); + + // Destroy. Caller is responsible for ensuring that no other + // threads are in Unwind calls. All resources are freed and all + // registered unwinder threads are deregistered. Can be called + // either in Admin or Unwind mode. + ~LUL(); + + // Notify the library that unwinding is now allowed and so + // admin-mode calls are no longer allowed. The object is initially + // created in admin mode. The only possible transition is + // admin->unwinding, therefore. + void EnableUnwinding(); + + // Notify of a new r-x mapping, and load the associated unwind info. + // The filename is strdup'd and used for debug printing. If + // aMappedImage is NULL, this function will mmap/munmap the file + // itself, so as to be able to read the unwind info. If + // aMappedImage is non-NULL then it is assumed to point to a + // called-supplied and caller-managed mapped image of the file. + // May only be called in Admin mode. + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage); + + // In rare cases we know an executable area exists but don't know + // what the associated file is. This call notifies LUL of such + // areas. This is important for correct functioning of stack + // scanning and of the x86-{linux,android} special-case + // __kernel_syscall function handling. + // This must be called only after the code area in + // question really has been mapped. + // May only be called in Admin mode. + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); + + // Notify that a mapped area has been unmapped; discard any + // associated unwind info. Acquires mRWlock for writing. Note that + // to avoid segfaulting the stack-scan unwinder, which inspects code + // areas, this must be called before the code area in question is + // really unmapped. Note that, unlike NotifyAfterMap(), this + // function takes the start and end addresses of the range to be + // unmapped, rather than a start and a length parameter. This is so + // as to make it possible to notify an unmap for the entire address + // space using a single call. + // May only be called in Admin mode. + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); + + // Apply NotifyBeforeUnmap to the entire address space. This causes + // LUL to discard all unwind and executable-area information for the + // entire address space. + // May only be called in Admin mode. + void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); } + + // Returns the number of mappings currently registered. + // May only be called in Admin mode. + size_t CountMappings(); + + // Unwind |aStackImg| starting with the context in |aStartRegs|. + // Write the number of frames recovered in *aFramesUsed. Put + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. + // |aFramesAvail| is the size of the two output arrays and hence the + // largest possible value of *aFramesUsed. PC values are always + // valid, and the unwind will stop when the PC becomes invalid, but + // the SP values might be invalid, in which case the value zero will + // be written in the relevant frameSPs[] slot. + // + // This function assumes that the SP values increase as it unwinds + // away from the innermost frame -- that is, that the stack grows + // down. It monitors SP values as it unwinds to check they + // decrease, so as to avoid looping on corrupted stacks. + // + // May only be called in Unwind mode. Multiple threads may unwind + // at once. LUL user is responsible for ensuring that no thread makes + // any Admin calls whilst in Unwind mode. + // MOZ_CRASHes if the calling thread is not registered for unwinding. + // + // The calling thread must previously have been registered via a call to + // RegisterSampledThread. + void Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail, + UnwindRegs* aStartRegs, StackImage* aStackImg); + + // The logging sink. Call to send debug strings to the caller- + // specified destination. Can only be called by the Admin thread. + void (*mLog)(const char*); + + // Statistics relating to unwinding. These have to be atomic since + // unwinding can occur on different threads simultaneously. + LULStats> mStats; + + // Possibly show the statistics. This may not be called from any + // registered sampling thread, since it involves I/O. + void MaybeShowStats(); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const; + + private: + // The statistics counters at the point where they were last printed. + LULStats mStatsPrevious; + + // Are we in admin mode? Initially |true| but changes to |false| + // once unwinding begins. + bool mAdminMode; + + // The thread ID associated with admin mode. This is the only thread + // that is allowed do perform non-Unwind calls on this object. Conversely, + // no registered Unwinding thread may be the admin thread. This is so + // as to clearly partition the one thread that may do dynamic memory + // allocation from the threads that are being sampled, since the latter + // absolutely may not do dynamic memory allocation. + ProfilerThreadId mAdminThreadId; + + // The top level mapping from code address ranges to postprocessed + // unwind info. Basically a sorted array of (addr, len, info) + // records. This field is updated by NotifyAfterMap and NotifyBeforeUnmap. + PriMap* mPriMap; + + // An auxiliary structure that records which address ranges are + // mapped r-x, for the benefit of the stack scanner. + SegArray* mSegArray; + + // A UniqueStringUniverse that holds all the strdup'd strings created + // whilst reading unwind information. This is included so as to make + // it possible to free them in ~LUL. + UniqueStringUniverse* mUSU; +}; + +// Run unit tests on an initialised, loaded-up LUL instance, and print +// summary results on |aLUL|'s logging sink. Also return the number +// of tests run in *aNTests and the number that passed in +// *aNTestsPassed. +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL); + +} // namespace lul + +#endif // LulMain_h diff --git a/tools/profiler/lul/LulMainInt.h b/tools/profiler/lul/LulMainInt.h new file mode 100644 index 0000000000..001a4aecfb --- /dev/null +++ b/tools/profiler/lul/LulMainInt.h @@ -0,0 +1,631 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMainInt_h +#define LulMainInt_h + +#include "PlatformMacros.h" +#include "LulMain.h" // for TaggedUWord + +#include +#include + +#include "mozilla/Assertions.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/Sprintf.h" + +// This file provides an internal interface inside LUL. If you are an +// end-user of LUL, do not include it in your code. The end-user +// interface is in LulMain.h. + +namespace lul { + +using std::vector; + +//////////////////////////////////////////////////////////////// +// DW_REG_ constants // +//////////////////////////////////////////////////////////////// + +// These are the Dwarf CFI register numbers, as (presumably) defined +// in the ELF ABI supplements for each architecture. + +enum DW_REG_NUMBER { + // No real register has this number. It's convenient to be able to + // treat the CFA (Canonical Frame Address) as "just another + // register", though. + DW_REG_CFA = -1, +#if defined(GP_ARCH_arm) + // ARM registers + DW_REG_ARM_R7 = 7, + DW_REG_ARM_R11 = 11, + DW_REG_ARM_R12 = 12, + DW_REG_ARM_R13 = 13, + DW_REG_ARM_R14 = 14, + DW_REG_ARM_R15 = 15, +#elif defined(GP_ARCH_arm64) + // aarch64 registers + DW_REG_AARCH64_X29 = 29, + DW_REG_AARCH64_X30 = 30, + DW_REG_AARCH64_SP = 31, +#elif defined(GP_ARCH_amd64) + // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are + // combined, a merged set of register constants is needed. + DW_REG_INTEL_XBP = 6, + DW_REG_INTEL_XSP = 7, + DW_REG_INTEL_XIP = 16, +#elif defined(GP_ARCH_x86) + DW_REG_INTEL_XBP = 5, + DW_REG_INTEL_XSP = 4, + DW_REG_INTEL_XIP = 8, +#elif defined(GP_ARCH_mips64) + DW_REG_MIPS_SP = 29, + DW_REG_MIPS_FP = 30, + DW_REG_MIPS_PC = 34, +#else +# error "Unknown arch" +#endif +}; + +//////////////////////////////////////////////////////////////// +// PfxExpr // +//////////////////////////////////////////////////////////////// + +enum PfxExprOp { + // meaning of mOperand effect on stack + PX_Start, // bool start-with-CFA? start, with CFA on stack, or not + PX_End, // none stop; result is at top of stack + PX_SImm32, // int32 push signed int32 + PX_DwReg, // DW_REG_NUMBER push value of the specified reg + PX_Deref, // none pop X ; push *X + PX_Add, // none pop X ; pop Y ; push Y + X + PX_Sub, // none pop X ; pop Y ; push Y - X + PX_And, // none pop X ; pop Y ; push Y & X + PX_Or, // none pop X ; pop Y ; push Y | X + PX_CmpGES, // none pop X ; pop Y ; push (Y >=s X) ? 1 : 0 + PX_Shl // none pop X ; pop Y ; push Y << X +}; + +struct PfxInstr { + PfxInstr(PfxExprOp opcode, int32_t operand) + : mOpcode(opcode), mOperand(operand) {} + explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {} + bool operator==(const PfxInstr& other) const { + return mOpcode == other.mOpcode && mOperand == other.mOperand; + } + PfxExprOp mOpcode; + int32_t mOperand; +}; + +static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly"); + +// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start]. +// In the case of any mishap (stack over/underflow, running off the end of +// the instruction vector, obviously malformed sequences), +// return an invalid TaggedUWord. +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector& aPfxInstrs); + +//////////////////////////////////////////////////////////////// +// LExpr // +//////////////////////////////////////////////////////////////// + +// An expression -- very primitive. Denotes either "register + +// offset", a dereferenced version of the same, or a reference to a +// prefix expression stored elsewhere. So as to allow convenient +// handling of Dwarf-derived unwind info, the register may also denote +// the CFA. A large number of these need to be stored, so we ensure +// it fits into 8 bytes. See comment below on RuleSet to see how +// expressions fit into the bigger picture. + +enum LExprHow { + UNKNOWN = 0, // This LExpr denotes no value. + NODEREF, // Value is (mReg + mOffset). + DEREF, // Value is *(mReg + mOffset). + PFXEXPR // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset]) +}; + +inline static const char* NameOf_LExprHow(LExprHow how) { + switch (how) { + case UNKNOWN: + return "UNKNOWN"; + case NODEREF: + return "NODEREF"; + case DEREF: + return "DEREF"; + case PFXEXPR: + return "PFXEXPR"; + default: + return "LExpr-??"; + } +} + +struct LExpr { + // Denotes an expression with no value. + LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {} + + // Denotes any expressible expression. + LExpr(LExprHow how, int16_t reg, int32_t offset) + : mHow(how), mReg(reg), mOffset(offset) { + switch (how) { + case UNKNOWN: + MOZ_ASSERT(reg == 0 && offset == 0); + break; + case NODEREF: + break; + case DEREF: + break; + case PFXEXPR: + MOZ_ASSERT(reg == 0 && offset >= 0); + break; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::LExpr: invalid how"); + } + } + + // Hash it, carefully looking only at defined parts. + mozilla::HashNumber hash() const { + mozilla::HashNumber h = mHow; + switch (mHow) { + case UNKNOWN: + break; + case NODEREF: + case DEREF: + h = mozilla::AddToHash(h, mReg); + h = mozilla::AddToHash(h, mOffset); + break; + case PFXEXPR: + h = mozilla::AddToHash(h, mOffset); + break; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::hash: invalid how"); + } + return h; + } + + // And structural equality. + bool equals(const LExpr& other) const { + if (mHow != other.mHow) { + return false; + } + switch (mHow) { + case UNKNOWN: + return true; + case NODEREF: + case DEREF: + return mReg == other.mReg && mOffset == other.mOffset; + case PFXEXPR: + return mOffset == other.mOffset; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::equals: invalid how"); + } + } + + // Change the offset for an expression that references memory. + LExpr add_delta(long delta) { + MOZ_ASSERT(mHow == NODEREF); + // If this is a non-debug build and the above assertion would have + // failed, at least return LExpr() so that the machinery that uses + // the resulting expression fails in a repeatable way. + return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta) + : LExpr(); // Gone bad + } + + // Dereference an expression that denotes a memory address. + LExpr deref() { + MOZ_ASSERT(mHow == NODEREF); + // Same rationale as for add_delta(). + return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset) + : LExpr(); // Gone bad + } + + // Print a rule for recovery of |aNewReg| whose recovered value + // is this LExpr. + std::string ShowRule(const char* aNewReg) const; + + // Evaluate this expression, producing a TaggedUWord. |aOldRegs| + // holds register values that may be referred to by the expression. + // |aCFA| holds the CFA value, if any, that applies. |aStackImg| + // contains a chuck of stack that will be consulted if the expression + // references memory. |aPfxInstrs| holds the vector of PfxInstrs + // that will be consulted if this is a PFXEXPR. + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector* aPfxInstrs) const; + + // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then + // it denotes the CFA. All other allowed values for |mReg| are + // nonnegative and are DW_REG_ values. + LExprHow mHow : 8; + int16_t mReg; // A DW_REG_ value + int32_t mOffset; // 32-bit signed offset should be more than enough. +}; + +static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly"); + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +// This is platform-dependent. It describes how to recover the CFA and then +// how to recover the registers for the previous frame. Such "recipes" are +// specific to particular ranges of machine code, but the associated range +// is not stored in RuleSet, because in general each RuleSet may be used +// for many such range fragments ("extents"). See the comments below for +// Extent and SecMap. +// +// The set of LExprs contained in a given RuleSet describe a DAG which +// says how to compute the caller's registers ("new registers") from +// the callee's registers ("old registers"). The DAG can contain a +// single internal node, which is the value of the CFA for the callee. +// It would be possible to construct a DAG that omits the CFA, but +// including it makes the summarisers simpler, and the Dwarf CFI spec +// has the CFA as a central concept. +// +// For this to make sense, |mCfaExpr| can't have +// |mReg| == DW_REG_CFA since we have no previous value for the CFA. +// All of the other |Expr| fields can -- and usually do -- specify +// |mReg| == DW_REG_CFA. +// +// With that in place, the unwind algorithm proceeds as follows. +// +// (0) Initially: we have values for the old registers, and a memory +// image. +// +// (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed +// value to the set of "old registers". +// +// (2) Compute values for the registers by evaluating all of the other +// |Expr| fields in the RuleSet. These can depend on both the old +// register values and the just-computed CFA. +// +// If we are unwinding without computing a CFA, perhaps because the +// RuleSets are derived from EXIDX instead of Dwarf, then +// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will +// be invalid -- that is, TaggedUWord() -- and so any attempt to use +// that will result in the same value. But that's OK because the +// RuleSet would make no sense if depended on the CFA but specified no +// way to compute it. +// +// A RuleSet is not allowed to cover zero address range. Having zero +// length would break binary searching in SecMaps and PriMaps. + +class RuleSet { + public: + RuleSet(); + void Print(uintptr_t avma, uintptr_t len, void (*aLog)(const char*)) const; + + // Find the LExpr* for a given DW_REG_ value in this class. + LExpr* ExprForRegno(DW_REG_NUMBER aRegno); + + // How to compute the CFA. + LExpr mCfaExpr; + // How to compute caller register values. These may reference the + // value defined by |mCfaExpr|. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + LExpr mXipExpr; // return address + LExpr mXspExpr; + LExpr mXbpExpr; +#elif defined(GP_ARCH_arm) + LExpr mR15expr; // return address + LExpr mR14expr; + LExpr mR13expr; + LExpr mR12expr; + LExpr mR11expr; + LExpr mR7expr; +#elif defined(GP_ARCH_arm64) + LExpr mX29expr; // frame pointer register + LExpr mX30expr; // link register + LExpr mSPexpr; +#elif defined(GP_ARCH_mips64) + LExpr mPCexpr; + LExpr mFPexpr; + LExpr mSPexpr; +#else +# error "Unknown arch" +#endif + + // Machinery in support of hashing. + typedef RuleSet Lookup; + + static mozilla::HashNumber hash(RuleSet rs) { + mozilla::HashNumber h = rs.mCfaExpr.hash(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + h = mozilla::AddToHash(h, rs.mXipExpr.hash()); + h = mozilla::AddToHash(h, rs.mXspExpr.hash()); + h = mozilla::AddToHash(h, rs.mXbpExpr.hash()); +#elif defined(GP_ARCH_arm) + h = mozilla::AddToHash(h, rs.mR15expr.hash()); + h = mozilla::AddToHash(h, rs.mR14expr.hash()); + h = mozilla::AddToHash(h, rs.mR13expr.hash()); + h = mozilla::AddToHash(h, rs.mR12expr.hash()); + h = mozilla::AddToHash(h, rs.mR11expr.hash()); + h = mozilla::AddToHash(h, rs.mR7expr.hash()); +#elif defined(GP_ARCH_arm64) + h = mozilla::AddToHash(h, rs.mX29expr.hash()); + h = mozilla::AddToHash(h, rs.mX30expr.hash()); + h = mozilla::AddToHash(h, rs.mSPexpr.hash()); +#elif defined(GP_ARCH_mips64) + h = mozilla::AddToHash(h, rs.mPCexpr.hash()); + h = mozilla::AddToHash(h, rs.mFPexpr.hash()); + h = mozilla::AddToHash(h, rs.mSPexpr.hash()); +#else +# error "Unknown arch" +#endif + return h; + } + + static bool match(const RuleSet& rs1, const RuleSet& rs2) { + return rs1.mCfaExpr.equals(rs2.mCfaExpr) && +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + rs1.mXipExpr.equals(rs2.mXipExpr) && + rs1.mXspExpr.equals(rs2.mXspExpr) && + rs1.mXbpExpr.equals(rs2.mXbpExpr); +#elif defined(GP_ARCH_arm) + rs1.mR15expr.equals(rs2.mR15expr) && + rs1.mR14expr.equals(rs2.mR14expr) && + rs1.mR13expr.equals(rs2.mR13expr) && + rs1.mR12expr.equals(rs2.mR12expr) && + rs1.mR11expr.equals(rs2.mR11expr) && rs1.mR7expr.equals(rs2.mR7expr); +#elif defined(GP_ARCH_arm64) + rs1.mX29expr.equals(rs2.mX29expr) && + rs1.mX30expr.equals(rs2.mX30expr) && rs1.mSPexpr.equals(rs2.mSPexpr); +#elif defined(GP_ARCH_mips64) + rs1.mPCexpr.equals(rs2.mPCexpr) && rs1.mFPexpr.equals(rs2.mFPexpr) && + rs1.mSPexpr.equals(rs2.mSPexpr); +#else +# error "Unknown arch" +#endif + } +}; + +// Returns |true| for Dwarf register numbers which are members +// of the set of registers that LUL unwinds on this target. +static inline bool registerIsTracked(DW_REG_NUMBER reg) { + switch (reg) { +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XIP: + return true; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: + return true; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: + return true; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_FP: + case DW_REG_MIPS_SP: + case DW_REG_MIPS_PC: + return true; +#else +# error "Unknown arch" +#endif + default: + return false; + } +} + +//////////////////////////////////////////////////////////////// +// Extent // +//////////////////////////////////////////////////////////////// + +struct Extent { + // Three fields, which together take 8 bytes. + uint32_t mOffset; + uint16_t mLen; + uint16_t mDictIx; + + // What this means is: suppose we are looking for the unwind rules for some + // code address (AVMA) `avma`. If we can find some SecMap `secmap` such + // that `avma` falls in the range + // + // `[secmap.mMapMinAVMA, secmap.mMapMaxAVMA]` + // + // then the RuleSet to use is `secmap.mDictionary[dictIx]` iff we can find + // an `extent` in `secmap.mExtents` such that `avma` falls into the range + // + // `[secmap.mMapMinAVMA + extent.offset(), + // secmap.mMapMinAVMA + extent.offset() + extent.len())`. + // + // Packing Extent into the minimum space is important, since there will be + // huge numbers of Extents -- around 3 million for libxul.so as of Sept + // 2020. Here, we aim for an 8-byte size, with the field sizes chosen + // carefully, as follows: + // + // `offset` denotes a byte offset inside the text section for some shared + // object. libxul.so is by far the largest. As of Sept 2020 it has a text + // size of up to around 120MB, that is, close to 2^27 bytes. Hence a 32-bit + // `offset` field gives a safety margin of around a factor of 32 + // (== 2 ^(32 - 27)). + // + // `dictIx` indicates a unique `RuleSet` for some code address range. + // Experimentation on x86_64-linux indicates that only around 300 different + // `RuleSet`s exist, for libxul.so. A 16-bit bit field allows up to 65536 + // to be recorded, hence leaving us a generous safety margin. + // + // `len` indicates the length of the associated address range. + // + // Note the representation becomes unusable if either `offset` overflows 32 + // bits or `dictIx` overflows 16 bits. On the other hand, it does not + // matter (although is undesirable) if `len` overflows 16 bits, because in + // that case we can add multiple size-65535 entries to `secmap.mExtents` to + // cover the entire range. Hence the field sizes are biased so as to give a + // good safety margin for `offset` and `dictIx` at the cost of stealing bits + // from `len`. Almost all `len` values we will ever see in practice are + // 65535 or less, so stealing those bits does not matter much. + // + // If further compression is required, it would be feasible to implement + // Extent using 29 bits for the offset, 8 bits for the length and 11 bits + // for the dictionary index, giving a total of 6 bytes, provided that the + // data is packed into 3 uint16_t's. That would be a bit slower, though, + // due to the bit packing, and it would be more fragile, in the sense that + // it would fail for any object with more than 512MB of text segment, or + // with more than 2048 different `RuleSet`s. For the current (Sept 2020) + // libxul.so situation, though, it would work fine. + + Extent(uint32_t offset, uint32_t len, uint32_t dictIx) { + MOZ_RELEASE_ASSERT(len < (1 << 16)); + MOZ_RELEASE_ASSERT(dictIx < (1 << 16)); + mOffset = offset; + mLen = len; + mDictIx = dictIx; + } + inline uint32_t offset() const { return mOffset; } + inline uint32_t len() const { return mLen; } + inline uint32_t dictIx() const { return mDictIx; } + void setLen(uint32_t len) { + MOZ_RELEASE_ASSERT(len < (1 << 16)); + mLen = len; + } + void Print(void (*aLog)(const char*)) const { + char buf[64]; + SprintfLiteral(buf, "Extent(offs=0x%x, len=%u, dictIx=%u)", this->offset(), + this->len(), this->dictIx()); + aLog(buf); + } +}; + +static_assert(sizeof(Extent) == 8); + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// A SecMap may have zero address range, temporarily, whilst RuleSets +// are being added to it. But adding a zero-range SecMap to a PriMap +// will make it impossible to maintain the total order of the PriMap +// entries, and so that can't be allowed to happen. + +class SecMap { + public: + // In the constructor, `mapStartAVMA` and `mapLen` define the actual + // (in-process) virtual addresses covered by the SecMap. All RuleSets + // subsequently added to it by calling `AddRuleSet` must fall into this + // address range, and attempts to add ones outside the range will be + // ignored. This restriction exists because the type Extent (see below) + // indicates an address range for a RuleSet, but for reasons of compactness, + // it does not contain the start address of the range. Instead, it contains + // a 32-bit offset from the base address of the SecMap. This is also the + // reason why the map's size is a `uint32_t` and not a `uintptr_t`. + // + // The effect is to limit this mechanism to shared objects / executables + // whose text section size does not exceed 4GB (2^32 bytes). Given that, as + // of Sept 2020, libxul.so's text section size is around 120MB, this does + // not seem like much of a limitation. + // + // From the supplied `mapStartAVMA` and `mapLen`, fields `mMapMinAVMA` and + // `mMapMaxAVMA` are calculated. It is intended that no two SecMaps owned + // by the same PriMap contain overlapping address ranges, and the PriMap + // logic enforces that. + // + // Some invariants: + // + // mExtents is nonempty + // <=> mMapMinAVMA <= mMapMaxAVMA + // && mMapMinAVMA <= apply_delta(mExtents[0].offset()) + // && apply_delta(mExtents[#rulesets-1].offset() + // + mExtents[#rulesets-1].len() - 1) <= mMapMaxAVMA + // where + // apply_delta(off) = off + mMapMinAVMA + // + // This requires that no RuleSet has zero length. + // + // mExtents is empty + // <=> mMapMinAVMA > mMapMaxAVMA + // + // This doesn't constrain mMapMinAVMA and mMapMaxAVMA uniquely, so let's use + // mMapMinAVMA == 1 and mMapMaxAVMA == 0 to denote this case. + + SecMap(uintptr_t mapStartAVMA, uint32_t mapLen, void (*aLog)(const char*)); + ~SecMap(); + + // Binary search mRuleSets to find one that brackets |ia|, or nullptr + // if none is found. It's not allowable to do this until PrepareRuleSets + // has been called first. + RuleSet* FindRuleSet(uintptr_t ia); + + // Add a RuleSet to the collection. The rule is copied in. Calling + // this makes the map non-searchable. + void AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len); + + // Add a PfxInstr to the vector of such instrs, and return the index + // in the vector. Calling this makes the map non-searchable. + uint32_t AddPfxInstr(PfxInstr pfxi); + + // Returns the entire vector of PfxInstrs. + const vector* GetPfxInstrs() { return &mPfxInstrs; } + + // Prepare the map for searching, by sorting it, de-overlapping entries and + // removing any resulting zero-length entries. At the start of this + // routine, all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and + // not have zero length, as a result of the checks in AddRuleSet(). + void PrepareRuleSets(); + + bool IsEmpty(); + + size_t Size() { return mExtents.size() + mDictionary.size(); } + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + // The extent of this SecMap as a whole. The extents of all contained + // RuleSets must fall inside this. See comment above for details. + uintptr_t mMapMinAVMA; + uintptr_t mMapMaxAVMA; + + private: + // False whilst adding entries; true once it is safe to call FindRuleSet. + // Transition (false->true) is caused by calling PrepareRuleSets(). + bool mUsable; + + // This is used to find and remove duplicate RuleSets while we are adding + // them to the SecMap. Almost all RuleSets are duplicates, so de-duping + // them is a huge space win. This is non-null while `mUsable` is false, and + // becomes null (is discarded) after the call to PrepareRuleSets, which + // copies all the entries into `mDictionary`. + mozilla::UniquePtr< + mozilla::HashMap> + mUniqifier; + + // This will contain final contents of `mUniqifier`, but ordered + // (implicitly) by the `uint32_t` value fields, for fast access. + vector mDictionary; + + // A vector of Extents, sorted by offset value, nonoverlapping (post + // PrepareRuleSets()). + vector mExtents; + + // A vector of PfxInstrs, which are referred to by the RuleSets. + // These are provided as a representation of Dwarf expressions + // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression), + // are relatively expensive to evaluate, and and are therefore + // expected to be used only occasionally. + // + // The vector holds a bunch of separate PfxInstr programs, each one + // starting with a PX_Start and terminated by a PX_End, all + // concatenated together. When a RuleSet can't recover a value + // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is + // the index in this vector of start of the necessary PfxInstr program. + vector mPfxInstrs; + + // A logging sink, for debugging. + void (*mLog)(const char*); +}; + +} // namespace lul + +#endif // ndef LulMainInt_h diff --git a/tools/profiler/lul/platform-linux-lul.cpp b/tools/profiler/lul/platform-linux-lul.cpp new file mode 100644 index 0000000000..4027905c60 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.cpp @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mozilla/ProfilerState.h" +#include "platform.h" +#include "PlatformMacros.h" +#include "LulMain.h" +#include "shared-libraries.h" +#include "AutoObjectMapper.h" + +// Contains miscellaneous helpers that are used to connect the Gecko Profiler +// and LUL. + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL) { + MOZ_ASSERT(aLUL->CountMappings() == 0); + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + + for (size_t i = 0; i < info.GetSize(); i++) { + const SharedLibrary& lib = info.GetEntry(i); + + std::string nativePath = lib.GetNativeDebugPath(); + + // We can use the standard POSIX-based mapper. + AutoObjectMapperPOSIX mapper(aLUL->mLog); + + // Ask |mapper| to map the object. Then hand its mapped address + // to NotifyAfterMap(). + void* image = nullptr; + size_t size = 0; + bool ok = mapper.Map(&image, &size, nativePath); + if (ok && image && size > 0) { + aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(), + nativePath.c_str(), image); + } else if (!ok && lib.GetDebugName().IsEmpty()) { + // The object has no name and (as a consequence) the mapper failed to map + // it. This happens on Linux, where GetInfoForSelf() produces such a + // mapping for the VDSO. This is a problem on x86-{linux,android} because + // lack of knowledge about the mapped area inhibits LUL's special + // __kernel_syscall handling. Hence notify |aLUL| at least of the + // mapping, even though it can't read any unwind information for the area. + aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart()); + } + + // |mapper| goes out of scope at this point and so its destructor + // unmaps the object. + } + +#else +# error "Unknown platform" +#endif +} + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str) { + // These are only printed when Verbose logging is enabled (e.g. with + // MOZ_LOG="prof:5"). This is because LUL's logging is much more verbose than + // the rest of the profiler's logging, which occurs at the Info (3) and Debug + // (4) levels. + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Verbose, + ("[%" PRIu64 "] %s", + uint64_t(profiler_current_process_id().ToNumber()), str)); +} diff --git a/tools/profiler/lul/platform-linux-lul.h b/tools/profiler/lul/platform-linux-lul.h new file mode 100644 index 0000000000..7c94299961 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.h @@ -0,0 +1,19 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PLATFORM_LINUX_LUL_H +#define MOZ_PLATFORM_LINUX_LUL_H + +#include "platform.h" + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL); + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str); + +#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */ diff --git a/tools/profiler/moz.build b/tools/profiler/moz.build new file mode 100644 index 0000000000..8b185195f8 --- /dev/null +++ b/tools/profiler/moz.build @@ -0,0 +1,227 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG["MOZ_GECKO_PROFILER"]: + DEFINES["MOZ_REPLACE_MALLOC_PREFIX"] = "profiler" + XPIDL_MODULE = "profiler" + XPIDL_SOURCES += [ + "gecko/nsIProfiler.idl", + ] + EXPORTS += [ + "public/GeckoProfilerReporter.h", + "public/ProfilerChild.h", + "public/ProfilerCodeAddressService.h", + "public/shared-libraries.h", + ] + UNIFIED_SOURCES += [ + "core/PageInformation.cpp", + "core/platform.cpp", + "core/ProfileBuffer.cpp", + "core/ProfileBufferEntry.cpp", + "core/ProfiledThreadData.cpp", + "core/ProfilerBacktrace.cpp", + "core/ProfilerCodeAddressService.cpp", + "core/ProfilerMarkers.cpp", + "gecko/ChildProfilerController.cpp", + "gecko/nsProfilerStartParams.cpp", + "gecko/ProfilerChild.cpp", + "gecko/ProfilerIOInterposeObserver.cpp", + ] + if CONFIG["MOZ_REPLACE_MALLOC"] and CONFIG["MOZ_PROFILER_MEMORY"]: + SOURCES += [ + "core/memory_hooks.cpp", # Non-unified because of order of #includes + ] + + XPCOM_MANIFESTS += [ + "gecko/components.conf", + ] + + if CONFIG["OS_TARGET"] == "Darwin": + # This file cannot be built in unified mode because it includes + # "nsLocalFile.h", which pulls in a system header which uses a type + # called TextRange, which conflicts with mozilla::TextRange due to + # a "using namespace mozilla;" declaration from a different file. + SOURCES += [ + "gecko/nsProfiler.cpp", + ] + else: + UNIFIED_SOURCES += [ + "gecko/nsProfiler.cpp", + ] + + if CONFIG["OS_TARGET"] in ("Android", "Linux", "FreeBSD"): + if CONFIG["CPU_ARCH"] in ("arm", "aarch64", "x86", "x86_64", "mips64"): + UNIFIED_SOURCES += [ + "lul/AutoObjectMapper.cpp", + "lul/LulCommon.cpp", + "lul/LulDwarf.cpp", + "lul/LulDwarfSummariser.cpp", + "lul/LulElf.cpp", + "lul/LulMain.cpp", + "lul/platform-linux-lul.cpp", + ] + # These files cannot be built in unified mode because of name clashes with mozglue headers on Android. + SOURCES += [ + "core/shared-libraries-linux.cc", + ] + if not CONFIG["MOZ_CRASHREPORTER"]: + SOURCES += [ + "/toolkit/crashreporter/google-breakpad/src/common/linux/elfutils.cc", + "/toolkit/crashreporter/google-breakpad/src/common/linux/file_id.cc", + "/toolkit/crashreporter/google-breakpad/src/common/linux/linux_libc_support.cc", + "/toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.cc", + ] + if not CONFIG["HAVE_GETCONTEXT"]: + SOURCES += [ + "/toolkit/crashreporter/google-breakpad/src/common/linux/breakpad_getcontext.S" + ] + if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + UNIFIED_SOURCES += [ + "core/PowerCounters-linux.cpp", + ] + if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] != "FreeBSD": + SOURCES += [ + "core/EHABIStackWalk.cpp", + ] + elif CONFIG["OS_TARGET"] == "Darwin": + UNIFIED_SOURCES += [ + "core/shared-libraries-macos.cc", + ] + if CONFIG["CPU_ARCH"] == "aarch64": + UNIFIED_SOURCES += [ + "core/PowerCounters-mac-arm64.cpp", + ] + if CONFIG["CPU_ARCH"] == "x86_64": + UNIFIED_SOURCES += [ + "core/PowerCounters-mac-amd64.cpp", + ] + elif CONFIG["OS_TARGET"] == "WINNT": + if CONFIG["CC_TYPE"] == "clang-cl": + UNIFIED_SOURCES += [ + "core/PowerCounters-win.cpp", + ] + SOURCES += [ + "core/shared-libraries-win32.cc", + ] + + LOCAL_INCLUDES += [ + "/caps", + "/docshell/base", + "/ipc/chromium/src", + "/mozglue/linker", + "/netwerk/base", + "/netwerk/protocol/http", + "/toolkit/components/jsoncpp/include", + "/toolkit/crashreporter/google-breakpad/src", + "/tools/profiler/core/", + "/tools/profiler/gecko/", + "/xpcom/base", + ] + + if CONFIG["OS_TARGET"] == "Android": + DEFINES["ANDROID_NDK_MAJOR_VERSION"] = CONFIG["ANDROID_NDK_MAJOR_VERSION"] + DEFINES["ANDROID_NDK_MINOR_VERSION"] = CONFIG["ANDROID_NDK_MINOR_VERSION"] + LOCAL_INCLUDES += [ + # We need access to Breakpad's getcontext(3) which is suitable for Android + "/toolkit/crashreporter/google-breakpad/src/common/android/include", + ] + + if CONFIG["MOZ_VTUNE"]: + DEFINES["MOZ_VTUNE_INSTRUMENTATION"] = True + UNIFIED_SOURCES += [ + "core/VTuneProfiler.cpp", + ] + + XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.ini"] + MOCHITEST_CHROME_MANIFESTS += ["tests/chrome/chrome.ini"] + BROWSER_CHROME_MANIFESTS += ["tests/browser/browser.ini"] + +UNIFIED_SOURCES += [ + "core/MicroGeckoProfiler.cpp", + "core/ProfileAdditionalInformation.cpp", + "core/ProfilerBindings.cpp", + "core/ProfilerThreadRegistration.cpp", + "core/ProfilerThreadRegistrationData.cpp", + "core/ProfilerThreadRegistry.cpp", + "core/ProfilerUtils.cpp", + "gecko/ProfilerParent.cpp", +] + +IPDL_SOURCES += [ + "gecko/PProfiler.ipdl", + "gecko/ProfilerTypes.ipdlh", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +EXPORTS += [ + "public/ChildProfilerController.h", + "public/GeckoProfiler.h", + "public/MicroGeckoProfiler.h", + "public/ProfileAdditionalInformation.h", + "public/ProfilerBindings.h", + "public/ProfilerControl.h", + "public/ProfilerParent.h", + "public/ProfilerRustBindings.h", +] + +EXPORTS.mozilla += [ + "public/ProfileBufferEntrySerializationGeckoExtensions.h", + "public/ProfileJSONWriter.h", + "public/ProfilerCounts.h", + "public/ProfilerLabels.h", + "public/ProfilerMarkers.h", + "public/ProfilerMarkersDetail.h", + "public/ProfilerMarkersPrerequisites.h", + "public/ProfilerMarkerTypes.h", + "public/ProfilerRunnable.h", + "public/ProfilerState.h", + "public/ProfilerThreadPlatformData.h", + "public/ProfilerThreadRegistration.h", + "public/ProfilerThreadRegistrationData.h", + "public/ProfilerThreadRegistrationInfo.h", + "public/ProfilerThreadRegistry.h", + "public/ProfilerThreadSleep.h", + "public/ProfilerThreadState.h", + "public/ProfilerUtils.h", +] + +GeneratedFile( + "rust-api/src/gecko_bindings/profiling_categories.rs", + script="../../mozglue/baseprofiler/build/generate_profiling_categories.py", + entry_point="generate_rust_enums", + inputs=["../../mozglue/baseprofiler/build/profiling_categories.yaml"], +) + +CONFIGURE_SUBST_FILES += [ + "rust-api/extra-bindgen-flags", +] + + +if CONFIG["COMPILE_ENVIRONMENT"]: + CbindgenHeader("profiler_ffi_generated.h", inputs=["rust-api"]) + + EXPORTS.mozilla += [ + "!profiler_ffi_generated.h", + ] + +USE_LIBS += [ + "jsoncpp", +] + +FINAL_LIBRARY = "xul" + +if CONFIG["ENABLE_TESTS"]: + DIRS += ["tests/gtest"] + +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += [ + "-Wno-error=stack-protector", + "-Wno-ignored-qualifiers", # due to use of breakpad headers + ] + +with Files("**"): + BUG_COMPONENT = ("Core", "Gecko Profiler") diff --git a/tools/profiler/public/ChildProfilerController.h b/tools/profiler/public/ChildProfilerController.h new file mode 100644 index 0000000000..8febc25b65 --- /dev/null +++ b/tools/profiler/public/ChildProfilerController.h @@ -0,0 +1,71 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ChildProfilerController_h +#define ChildProfilerController_h + +#include "base/process.h" +#include "mozilla/Attributes.h" +#include "mozilla/ipc/ProtocolUtils.h" +#include "mozilla/DataMutex.h" +#include "mozilla/RefPtr.h" +#include "nsISupportsImpl.h" +#include "nsStringFwd.h" +#include "ProfileAdditionalInformation.h" + +namespace mozilla { + +class ProfilerChild; +class PProfilerChild; +class PProfilerParent; + +// ChildProfilerController manages the setup and teardown of ProfilerChild. +// It's used on the main thread. +// It manages a background thread that ProfilerChild runs on. +class ChildProfilerController final { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ChildProfilerController) + +#ifdef MOZ_GECKO_PROFILER + static already_AddRefed Create( + mozilla::ipc::Endpoint&& aEndpoint); + + [[nodiscard]] ProfileAndAdditionalInformation + GrabShutdownProfileAndShutdown(); + void Shutdown(); + + private: + ChildProfilerController(); + ~ChildProfilerController(); + void Init(mozilla::ipc::Endpoint&& aEndpoint); + void ShutdownAndMaybeGrabShutdownProfileFirst( + ProfileAndAdditionalInformation* aOutShutdownProfileInformation); + + // Called on mThread: + void SetupProfilerChild(mozilla::ipc::Endpoint&& aEndpoint); + void ShutdownProfilerChild( + ProfileAndAdditionalInformation* aOutShutdownProfileInformation); + + RefPtr mProfilerChild; // only accessed on mThread + DataMutex> mThread; +#else + static already_AddRefed Create( + mozilla::ipc::Endpoint&& aEndpoint) { + return nullptr; + } + [[nodiscard]] ProfileAndAdditionalInformation + GrabShutdownProfileAndShutdown() { + return ProfileAndAdditionalInformation(std::move(EmptyCString())); + } + void Shutdown() {} + + private: + ~ChildProfilerController() {} +#endif // MOZ_GECKO_PROFILER +}; + +} // namespace mozilla + +#endif // ChildProfilerController_h diff --git a/tools/profiler/public/GeckoProfiler.h b/tools/profiler/public/GeckoProfiler.h new file mode 100644 index 0000000000..f7c045297e --- /dev/null +++ b/tools/profiler/public/GeckoProfiler.h @@ -0,0 +1,435 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// The Gecko Profiler is an always-on profiler that takes fast and low overhead +// samples of the program execution using only userspace functionality for +// portability. The goal of this module is to provide performance data in a +// generic cross-platform way without requiring custom tools or kernel support. +// +// Samples are collected to form a timeline with optional timeline event +// (markers) used for filtering. The samples include both native stacks and +// platform-independent "label stack" frames. + +#ifndef GeckoProfiler_h +#define GeckoProfiler_h + +// Everything in here is also safe to include unconditionally, and only defines +// empty macros if MOZ_GECKO_PROFILER is unset. +// If your file only uses particular APIs (e.g., only markers), please consider +// including only the needed headers instead of this one, to reduce compilation +// dependencies. +#include "BaseProfiler.h" +#include "ProfileAdditionalInformation.h" +#include "mozilla/ProfilerCounts.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/ProfilerMarkers.h" +#include "mozilla/ProfilerState.h" +#include "mozilla/ProfilerThreadSleep.h" +#include "mozilla/ProfilerThreadState.h" +#include "mozilla/ProgressLogger.h" +#include "mozilla/Result.h" +#include "mozilla/ResultVariant.h" + +#ifndef MOZ_GECKO_PROFILER + +# include "mozilla/UniquePtr.h" + +// This file can be #included unconditionally. However, everything within this +// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the +// following macros and functions, which encapsulate the most common operations +// and thus avoid the need for many #ifdefs. + +# define PROFILER_REGISTER_THREAD(name) +# define PROFILER_UNREGISTER_THREAD() +# define AUTO_PROFILER_REGISTER_THREAD(name) + +# define PROFILER_JS_INTERRUPT_CALLBACK() + +# define PROFILER_SET_JS_CONTEXT(cx) +# define PROFILER_CLEAR_JS_CONTEXT() + +// Function stubs for when MOZ_GECKO_PROFILER is not defined. + +// This won't be used, it's just there to allow the empty definition of +// `profiler_get_backtrace`. +struct ProfilerBacktrace {}; +using UniqueProfilerBacktrace = mozilla::UniquePtr; + +// Get/Capture-backtrace functions can return nullptr or false, the result +// should be fed to another empty macro or stub anyway. + +static inline UniqueProfilerBacktrace profiler_get_backtrace() { + return nullptr; +} + +// This won't be used, it's just there to allow the empty definitions of +// `profiler_capture_backtrace_into` and `profiler_capture_backtrace`. +struct ProfileChunkedBuffer {}; + +static inline bool profiler_capture_backtrace_into( + mozilla::ProfileChunkedBuffer& aChunkedBuffer, + mozilla::StackCaptureOptions aCaptureOptions) { + return false; +} +static inline mozilla::UniquePtr +profiler_capture_backtrace() { + return nullptr; +} + +static inline void profiler_set_process_name( + const nsACString& aProcessName, const nsACString* aETLDplus1 = nullptr) {} + +static inline void profiler_received_exit_profile( + const nsACString& aExitProfile) {} + +static inline void profiler_register_page(uint64_t aTabID, + uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing) {} +static inline void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) { +} + +static inline void GetProfilerEnvVarsForChildProcess( + std::function&& aSetEnv) {} + +static inline void profiler_record_wakeup_count( + const nsACString& aProcessType) {} + +#else // !MOZ_GECKO_PROFILER + +# include "js/ProfilingStack.h" +# include "mozilla/Assertions.h" +# include "mozilla/Atomics.h" +# include "mozilla/Attributes.h" +# include "mozilla/BaseProfilerRAIIMacro.h" +# include "mozilla/Maybe.h" +# include "mozilla/PowerOfTwo.h" +# include "mozilla/ThreadLocal.h" +# include "mozilla/TimeStamp.h" +# include "mozilla/UniquePtr.h" +# include "nscore.h" +# include "nsINamed.h" +# include "nsString.h" +# include "nsThreadUtils.h" + +# include +# include + +class ProfilerBacktrace; +class ProfilerCodeAddressService; +struct JSContext; + +namespace mozilla { +class ProfileBufferControlledChunkManager; +class ProfileChunkedBuffer; +namespace baseprofiler { +class SpliceableJSONWriter; +} // namespace baseprofiler +} // namespace mozilla +class nsIURI; + +enum class ProfilerError { + IsInactive, + JsonGenerationFailed, +}; + +template +using ProfilerResult = mozilla::Result; + +//--------------------------------------------------------------------------- +// Give information to the profiler +//--------------------------------------------------------------------------- + +// Register/unregister threads with the profiler. Both functions operate the +// same whether the profiler is active or inactive. +# define PROFILER_REGISTER_THREAD(name) \ + do { \ + char stackTop; \ + profiler_register_thread(name, &stackTop); \ + } while (0) +# define PROFILER_UNREGISTER_THREAD() profiler_unregister_thread() +ProfilingStack* profiler_register_thread(const char* name, void* guessStackTop); +void profiler_unregister_thread(); + +// Registers a DOM Window (the JS global `window`) with the profiler. Each +// Window _roughly_ corresponds to a single document loaded within a +// browsing context. Both the Window Id and Browser Id are recorded to allow +// correlating different Windows loaded within the same tab or frame element. +// +// We register pages for each navigations but we do not register +// history.pushState or history.replaceState since they correspond to the same +// Inner Window ID. When a browsing context is first loaded, the first url +// loaded in it will be about:blank. Because of that, this call keeps the first +// non-about:blank registration of window and discards the previous one. +// +// "aTabID" is the BrowserId of that document belongs to. +// That's used to determine the tab of that page. +// "aInnerWindowID" is the ID of the `window` global object of that +// document. +// "aUrl" is the URL of the page. +// "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to +// determine sub documents of a page. +// "aIsPrivateBrowsing" is true if this browsing context happens in a +// private browsing context. +void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing); +// Unregister page with the profiler. +// +// Take a Inner Window ID and unregister the page entry that has the same ID. +void profiler_unregister_page(uint64_t aRegisteredInnerWindowID); + +// Remove all registered and unregistered pages in the profiler. +void profiler_clear_all_pages(); + +class BaseProfilerCount; +void profiler_add_sampled_counter(BaseProfilerCount* aCounter); +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter); + +// Register and unregister a thread within a scope. +# define AUTO_PROFILER_REGISTER_THREAD(name) \ + mozilla::AutoProfilerRegisterThread PROFILER_RAII(name) + +enum class SamplingState { + JustStopped, // Sampling loop has just stopped without sampling, between the + // callback registration and now. + SamplingPaused, // Profiler is active but sampling loop has gone through a + // pause. + NoStackSamplingCompleted, // A full sampling loop has completed in + // no-stack-sampling mode. + SamplingCompleted // A full sampling loop has completed. +}; + +using PostSamplingCallback = std::function; + +// Install a callback to be invoked at the end of the next sampling loop. +// - `false` if profiler is not active, `aCallback` will stay untouched. +// - `true` if `aCallback` was successfully moved-from into internal storage, +// and *will* be invoked at the end of the next sampling cycle. Note that this +// will happen on the Sampler thread, and will block further sampling, so +// please be mindful not to block for a long time (e.g., just dispatch a +// runnable to another thread.) Calling profiler functions from the callback +// is allowed. +[[nodiscard]] bool profiler_callback_after_sampling( + PostSamplingCallback&& aCallback); + +// Called by the JSRuntime's operation callback. This is used to start profiling +// on auxiliary threads. Operates the same whether the profiler is active or +// not. +# define PROFILER_JS_INTERRUPT_CALLBACK() profiler_js_interrupt_callback() +void profiler_js_interrupt_callback(); + +// Set and clear the current thread's JSContext. +# define PROFILER_SET_JS_CONTEXT(cx) profiler_set_js_context(cx) +# define PROFILER_CLEAR_JS_CONTEXT() profiler_clear_js_context() +void profiler_set_js_context(JSContext* aCx); +void profiler_clear_js_context(); + +//--------------------------------------------------------------------------- +// Get information from the profiler +//--------------------------------------------------------------------------- + +// Get the chunk manager used in the current profiling session, or null. +mozilla::ProfileBufferControlledChunkManager* +profiler_get_controlled_chunk_manager(); + +// The number of milliseconds since the process started. Operates the same +// whether the profiler is active or inactive. +double profiler_time(); + +// An object of this class is passed to profiler_suspend_and_sample_thread(). +// For each stack frame, one of the Collect methods will be called. +class ProfilerStackCollector { + public: + // Some collectors need to worry about possibly overwriting previous + // generations of data. If that's not an issue, this can return Nothing, + // which is the default behaviour. + virtual mozilla::Maybe SamplePositionInBuffer() { + return mozilla::Nothing(); + } + virtual mozilla::Maybe BufferRangeStart() { + return mozilla::Nothing(); + } + + // This method will be called once if the thread being suspended is the main + // thread. Default behaviour is to do nothing. + virtual void SetIsMainThread() {} + + // WARNING: The target thread is suspended when the Collect methods are + // called. Do not try to allocate or acquire any locks, or you could + // deadlock. The target thread will have resumed by the time this function + // returns. + + virtual void CollectNativeLeafAddr(void* aAddr) = 0; + + virtual void CollectJitReturnAddr(void* aAddr) = 0; + + virtual void CollectWasmFrame(const char* aLabel) = 0; + + virtual void CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) = 0; +}; + +// This method suspends the thread identified by aThreadId, samples its +// profiling stack, JS stack, and (optionally) native stack, passing the +// collected frames into aCollector. aFeatures dictates which compiler features +// are used. |Leaf| is the only relevant one. +// Use `ProfilerThreadId{}` (unspecified) to sample the current thread. +void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId, + uint32_t aFeatures, + ProfilerStackCollector& aCollector, + bool aSampleNative = true); + +struct ProfilerBacktraceDestructor { + void operator()(ProfilerBacktrace*); +}; + +using UniqueProfilerBacktrace = + mozilla::UniquePtr; + +// Immediately capture the current thread's call stack, store it in the provided +// buffer (usually to avoid allocations if you can construct the buffer on the +// stack). Returns false if unsuccessful, or if the profiler is inactive. +bool profiler_capture_backtrace_into( + mozilla::ProfileChunkedBuffer& aChunkedBuffer, + mozilla::StackCaptureOptions aCaptureOptions); + +// Immediately capture the current thread's call stack, and return it in a +// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()). +// May be null if unsuccessful, or if the profiler is inactive. +mozilla::UniquePtr profiler_capture_backtrace(); + +// Immediately capture the current thread's call stack, and return it in a +// ProfilerBacktrace (usually for later use in marker function that take a +// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is +// inactive. +UniqueProfilerBacktrace profiler_get_backtrace(); + +struct ProfilerStats { + unsigned n = 0; + double sum = 0; + double min = std::numeric_limits::max(); + double max = 0; + void Count(double v) { + ++n; + sum += v; + if (v < min) { + min = v; + } + if (v > max) { + max = v; + } + } +}; + +struct ProfilerBufferInfo { + // Index of the oldest entry. + uint64_t mRangeStart; + // Index of the newest entry. + uint64_t mRangeEnd; + // Buffer capacity in number of 8-byte entries. + uint32_t mEntryCount; + // Sampling stats: Interval between successive samplings. + ProfilerStats mIntervalsUs; + // Sampling stats: Total sampling duration. (Split detail below.) + ProfilerStats mOverheadsUs; + // Sampling stats: Time to acquire the lock before sampling. + ProfilerStats mLockingsUs; + // Sampling stats: Time to discard expired data. + ProfilerStats mCleaningsUs; + // Sampling stats: Time to collect counter data. + ProfilerStats mCountersUs; + // Sampling stats: Time to sample thread stacks. + ProfilerStats mThreadsUs; +}; + +// Get information about the current buffer status. +// Returns Nothing() if the profiler is inactive. +// +// This information may be useful to a user-interface displaying the current +// status of the profiler, allowing the user to get a sense for how fast the +// buffer is being written to, and how much data is visible. +mozilla::Maybe profiler_get_buffer_info(); + +// Record through glean how many times profiler_thread_wake has been +// called. +void profiler_record_wakeup_count(const nsACString& aProcessType); + +//--------------------------------------------------------------------------- +// Output profiles +//--------------------------------------------------------------------------- + +// Set a user-friendly process name, used in JSON stream. Allows an optional +// detailed name which may include private info (eTLD+1 in fission) +void profiler_set_process_name(const nsACString& aProcessName, + const nsACString* aETLDplus1 = nullptr); + +// Record an exit profile from a child process. +void profiler_received_exit_profile(const nsACString& aExitProfile); + +// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the +// profiler is inactive. +// If aIsShuttingDown is true, the current time is included as the process +// shutdown time in the JSON's "meta" object. +mozilla::UniquePtr profiler_get_profile(double aSinceTime = 0, + bool aIsShuttingDown = false); + +// Write the profile for this process (excluding subprocesses) into aWriter. +// Returns a failed result if the profiler is inactive. +ProfilerResult +profiler_stream_json_for_this_process( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, double aSinceTime = 0, + bool aIsShuttingDown = false, + ProfilerCodeAddressService* aService = nullptr, + mozilla::ProgressLogger aProgressLogger = {}); + +// Get the profile and write it into a file. A no-op if the profile is +// inactive. +// +// This function is 'extern "C"' so that it is easily callable from a debugger +// in a build without debugging information (a workaround for +// http://llvm.org/bugs/show_bug.cgi?id=22211). +extern "C" { +void profiler_save_profile_to_file(const char* aFilename); +} + +//--------------------------------------------------------------------------- +// RAII classes +//--------------------------------------------------------------------------- + +namespace mozilla { + +// Convenience class to register and unregister a thread with the profiler. +// Needs to be the first object on the stack of the thread. +class MOZ_RAII AutoProfilerRegisterThread final { + public: + explicit AutoProfilerRegisterThread(const char* aName) { + profiler_register_thread(aName, this); + } + + ~AutoProfilerRegisterThread() { profiler_unregister_thread(); } + + private: + AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete; + AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) = + delete; +}; + +// Get the MOZ_PROFILER_STARTUP* environment variables that should be +// supplied to a child process that is about to be launched, in order +// to make that child process start with the same profiler settings as +// in the current process. The given function is invoked once for +// each variable to be set. +void GetProfilerEnvVarsForChildProcess( + std::function&& aSetEnv); + +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // GeckoProfiler_h diff --git a/tools/profiler/public/GeckoProfilerReporter.h b/tools/profiler/public/GeckoProfilerReporter.h new file mode 100644 index 0000000000..f5bf41f223 --- /dev/null +++ b/tools/profiler/public/GeckoProfilerReporter.h @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GeckoProfilerReporter_h +#define GeckoProfilerReporter_h + +#include "nsIMemoryReporter.h" + +class GeckoProfilerReporter final : public nsIMemoryReporter { + public: + NS_DECL_ISUPPORTS + + GeckoProfilerReporter() {} + + NS_IMETHOD + CollectReports(nsIHandleReportCallback* aHandleReport, nsISupports* aData, + bool aAnonymize) override; + + private: + ~GeckoProfilerReporter() {} +}; + +#endif diff --git a/tools/profiler/public/GeckoTraceEvent.h b/tools/profiler/public/GeckoTraceEvent.h new file mode 100644 index 0000000000..75affaf9c8 --- /dev/null +++ b/tools/profiler/public/GeckoTraceEvent.h @@ -0,0 +1,1060 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file under third_party_mods/chromium or at: +// http://src.chromium.org/svn/trunk/src/LICENSE + +#ifndef GECKO_TRACE_EVENT_H_ +#define GECKO_TRACE_EVENT_H_ + +#include "MicroGeckoProfiler.h" + +// Extracted from Chromium's src/base/debug/trace_event.h, modified to talk to +// the Gecko profiler. + +#if defined(RTC_DISABLE_TRACE_EVENTS) +# define RTC_TRACE_EVENTS_ENABLED 0 +#else +# define RTC_TRACE_EVENTS_ENABLED 1 +#endif + +// Type values for identifying types in the TraceValue union. +#define TRACE_VALUE_TYPE_BOOL (static_cast(1)) +#define TRACE_VALUE_TYPE_UINT (static_cast(2)) +#define TRACE_VALUE_TYPE_INT (static_cast(3)) +#define TRACE_VALUE_TYPE_DOUBLE (static_cast(4)) +#define TRACE_VALUE_TYPE_POINTER (static_cast(5)) +#define TRACE_VALUE_TYPE_STRING (static_cast(6)) +#define TRACE_VALUE_TYPE_COPY_STRING (static_cast(7)) + +#if RTC_TRACE_EVENTS_ENABLED + +// This header is designed to give you trace_event macros without specifying +// how the events actually get collected and stored. If you need to expose trace +// event to some other universe, you can copy-and-paste this file, +// implement the TRACE_EVENT_API macros, and do any other necessary fixup for +// the target platform. The end result is that multiple libraries can funnel +// events through to a shared trace event collector. + +// Trace events are for tracking application performance and resource usage. +// Macros are provided to track: +// Begin and end of function calls +// Counters +// +// Events are issued against categories. Whereas RTC_LOG's +// categories are statically defined, TRACE categories are created +// implicitly with a string. For example: +// TRACE_EVENT_INSTANT0("MY_SUBSYSTEM", "SomeImportantEvent") +// +// Events can be INSTANT, or can be pairs of BEGIN and END in the same scope: +// TRACE_EVENT_BEGIN0("MY_SUBSYSTEM", "SomethingCostly") +// doSomethingCostly() +// TRACE_EVENT_END0("MY_SUBSYSTEM", "SomethingCostly") +// Note: our tools can't always determine the correct BEGIN/END pairs unless +// these are used in the same scope. Use ASYNC_BEGIN/ASYNC_END macros if you +// need them to be in separate scopes. +// +// A common use case is to trace entire function scopes. This +// issues a trace BEGIN and END automatically: +// void doSomethingCostly() { +// TRACE_EVENT0("MY_SUBSYSTEM", "doSomethingCostly"); +// ... +// } +// +// Additional parameters can be associated with an event: +// void doSomethingCostly2(int howMuch) { +// TRACE_EVENT1("MY_SUBSYSTEM", "doSomethingCostly", +// "howMuch", howMuch); +// ... +// } +// +// The trace system will automatically add to this information the +// current process id, thread id, and a timestamp in microseconds. +// +// To trace an asynchronous procedure such as an IPC send/receive, use +// ASYNC_BEGIN and ASYNC_END: +// [single threaded sender code] +// static int send_count = 0; +// ++send_count; +// TRACE_EVENT_ASYNC_BEGIN0("ipc", "message", send_count); +// Send(new MyMessage(send_count)); +// [receive code] +// void OnMyMessage(send_count) { +// TRACE_EVENT_ASYNC_END0("ipc", "message", send_count); +// } +// The third parameter is a unique ID to match ASYNC_BEGIN/ASYNC_END pairs. +// ASYNC_BEGIN and ASYNC_END can occur on any thread of any traced process. +// Pointers can be used for the ID parameter, and they will be mangled +// internally so that the same pointer on two different processes will not +// match. For example: +// class MyTracedClass { +// public: +// MyTracedClass() { +// TRACE_EVENT_ASYNC_BEGIN0("category", "MyTracedClass", this); +// } +// ~MyTracedClass() { +// TRACE_EVENT_ASYNC_END0("category", "MyTracedClass", this); +// } +// } +// +// Trace event also supports counters, which is a way to track a quantity +// as it varies over time. Counters are created with the following macro: +// TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter", g_myCounterValue); +// +// Counters are process-specific. The macro itself can be issued from any +// thread, however. +// +// Sometimes, you want to track two counters at once. You can do this with two +// counter macros: +// TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter0", g_myCounterValue[0]); +// TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter1", g_myCounterValue[1]); +// Or you can do it with a combined macro: +// TRACE_COUNTER2("MY_SUBSYSTEM", "myCounter", +// "bytesPinned", g_myCounterValue[0], +// "bytesAllocated", g_myCounterValue[1]); +// This indicates to the tracing UI that these counters should be displayed +// in a single graph, as a summed area chart. +// +// Since counters are in a global namespace, you may want to disembiguate with a +// unique ID, by using the TRACE_COUNTER_ID* variations. +// +// By default, trace collection is compiled in, but turned off at runtime. +// Collecting trace data is the responsibility of the embedding +// application. In Chrome's case, navigating to about:tracing will turn on +// tracing and display data collected across all active processes. +// +// +// Memory scoping note: +// Tracing copies the pointers, not the string content, of the strings passed +// in for category, name, and arg_names. Thus, the following code will +// cause problems: +// char* str = strdup("impprtantName"); +// TRACE_EVENT_INSTANT0("SUBSYSTEM", str); // BAD! +// free(str); // Trace system now has dangling pointer +// +// To avoid this issue with the `name` and `arg_name` parameters, use the +// TRACE_EVENT_COPY_XXX overloads of the macros at additional runtime overhead. +// Notes: The category must always be in a long-lived char* (i.e. static const). +// The `arg_values`, when used, are always deep copied with the _COPY +// macros. +// +// When are string argument values copied: +// const char* arg_values are only referenced by default: +// TRACE_EVENT1("category", "name", +// "arg1", "literal string is only referenced"); +// Use TRACE_STR_COPY to force copying of a const char*: +// TRACE_EVENT1("category", "name", +// "arg1", TRACE_STR_COPY("string will be copied")); +// std::string arg_values are always copied: +// TRACE_EVENT1("category", "name", +// "arg1", std::string("string will be copied")); +// +// +// Thread Safety: +// Thread safety is provided by methods defined in event_tracer.h. See the file +// for details. + +// By default, const char* argument values are assumed to have long-lived scope +// and will not be copied. Use this macro to force a const char* to be copied. +# define TRACE_STR_COPY(str) \ + webrtc::trace_event_internal::TraceStringWithCopy(str) + +// This will mark the trace event as disabled by default. The user will need +// to explicitly enable the event. +# define TRACE_DISABLED_BY_DEFAULT(name) "disabled-by-default-" name + +// By default, uint64 ID argument values are not mangled with the Process ID in +// TRACE_EVENT_ASYNC macros. Use this macro to force Process ID mangling. +# define TRACE_ID_MANGLE(id) \ + webrtc::trace_event_internal::TraceID::ForceMangle(id) + +// Records a pair of begin and end events called "name" for the current +// scope, with 0, 1 or 2 associated arguments. If the category is not +// enabled, then this does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_EVENT0(category, name) \ + INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name) +# define TRACE_EVENT1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, arg1_name, arg1_val) +# define TRACE_EVENT2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) + +// Records a single event called "name" immediately, with 0, 1 or 2 +// associated arguments. If the category is not enabled, then this +// does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_EVENT_INSTANT0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_INSTANT1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val) +# define TRACE_EVENT_INSTANT2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val, \ + arg2_name, arg2_val) +# define TRACE_EVENT_COPY_INSTANT0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_INSTANT1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val) +# define TRACE_EVENT_COPY_INSTANT2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \ + arg2_name, arg2_val) + +// Records a single BEGIN event called "name" immediately, with 0, 1 or 2 +// associated arguments. If the category is not enabled, then this +// does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_EVENT_BEGIN0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_BEGIN1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val) +# define TRACE_EVENT_BEGIN2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val, \ + arg2_name, arg2_val) +# define TRACE_EVENT_COPY_BEGIN0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_BEGIN1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val) +# define TRACE_EVENT_COPY_BEGIN2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \ + arg2_name, arg2_val) + +// Records a single END event for "name" immediately. If the category +// is not enabled, then this does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_EVENT_END0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_END1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val) +# define TRACE_EVENT_END2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val, \ + arg2_name, arg2_val) +# define TRACE_EVENT_COPY_END0(category, name) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_END1(category, name, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val) +# define TRACE_EVENT_COPY_END2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \ + TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \ + arg2_name, arg2_val) + +// Records the value of a counter called "name" immediately. Value +// must be representable as a 32 bit integer. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_COUNTER1(category, name, value) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \ + TRACE_EVENT_FLAG_NONE, "value", \ + static_cast(value)) +# define TRACE_COPY_COUNTER1(category, name, value) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \ + TRACE_EVENT_FLAG_COPY, "value", \ + static_cast(value)) + +// Records the values of a multi-parted counter called "name" immediately. +// The UI will treat value1 and value2 as parts of a whole, displaying their +// values as a stacked-bar chart. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +# define TRACE_COUNTER2(category, name, value1_name, value1_val, value2_name, \ + value2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \ + TRACE_EVENT_FLAG_NONE, value1_name, \ + static_cast(value1_val), value2_name, \ + static_cast(value2_val)) +# define TRACE_COPY_COUNTER2(category, name, value1_name, value1_val, \ + value2_name, value2_val) \ + INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \ + TRACE_EVENT_FLAG_COPY, value1_name, \ + static_cast(value1_val), value2_name, \ + static_cast(value2_val)) + +// Records the value of a counter called "name" immediately. Value +// must be representable as a 32 bit integer. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +// - `id` is used to disambiguate counters with the same name. It must either +// be a pointer or an integer value up to 64 bits. If it's a pointer, the bits +// will be xored with a hash of the process ID so that the same pointer on +// two different processes will not collide. +# define TRACE_COUNTER_ID1(category, name, id, value) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_COUNTER, category, \ + name, id, TRACE_EVENT_FLAG_NONE, "value", \ + static_cast(value)) +# define TRACE_COPY_COUNTER_ID1(category, name, id, value) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_COUNTER, category, \ + name, id, TRACE_EVENT_FLAG_COPY, "value", \ + static_cast(value)) + +// Records the values of a multi-parted counter called "name" immediately. +// The UI will treat value1 and value2 as parts of a whole, displaying their +// values as a stacked-bar chart. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +// - `id` is used to disambiguate counters with the same name. It must either +// be a pointer or an integer value up to 64 bits. If it's a pointer, the bits +// will be xored with a hash of the process ID so that the same pointer on +// two different processes will not collide. +# define TRACE_COUNTER_ID2(category, name, id, value1_name, value1_val, \ + value2_name, value2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID( \ + TRACE_EVENT_PHASE_COUNTER, category, name, id, TRACE_EVENT_FLAG_NONE, \ + value1_name, static_cast(value1_val), value2_name, \ + static_cast(value2_val)) +# define TRACE_COPY_COUNTER_ID2(category, name, id, value1_name, value1_val, \ + value2_name, value2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID( \ + TRACE_EVENT_PHASE_COUNTER, category, name, id, TRACE_EVENT_FLAG_COPY, \ + value1_name, static_cast(value1_val), value2_name, \ + static_cast(value2_val)) + +// Records a single ASYNC_BEGIN event called "name" immediately, with 0, 1 or 2 +// associated arguments. If the category is not enabled, then this +// does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +// - `id` is used to match the ASYNC_BEGIN event with the ASYNC_END event. ASYNC +// events are considered to match if their category, name and id values all +// match. `id` must either be a pointer or an integer value up to 64 bits. If +// it's a pointer, the bits will be xored with a hash of the process ID so +// that the same pointer on two different processes will not collide. +// An asynchronous operation can consist of multiple phases. The first phase is +// defined by the ASYNC_BEGIN calls. Additional phases can be defined using the +// ASYNC_STEP macros. When the operation completes, call ASYNC_END. +// An ASYNC trace typically occur on a single thread (if not, they will only be +// drawn on the thread defined in the ASYNC_BEGIN event), but all events in that +// operation must use the same `name` and `id`. Each event can have its own +// args. +# define TRACE_EVENT_ASYNC_BEGIN0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_ASYNC_BEGIN1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val) +# define TRACE_EVENT_ASYNC_BEGIN2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val, arg2_name, arg2_val) +# define TRACE_EVENT_COPY_ASYNC_BEGIN0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_ASYNC_BEGIN1(category, name, id, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val) +# define TRACE_EVENT_COPY_ASYNC_BEGIN2(category, name, id, arg1_name, \ + arg1_val, arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val, arg2_name, arg2_val) + +// Records a single ASYNC_STEP event for `step` immediately. If the category +// is not enabled, then this does nothing. The `name` and `id` must match the +// ASYNC_BEGIN event above. The `step` param identifies this step within the +// async event. This should be called at the beginning of the next phase of an +// asynchronous operation. +# define TRACE_EVENT_ASYNC_STEP0(category, name, id, step) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category, \ + name, id, TRACE_EVENT_FLAG_NONE, "step", \ + step) +# define TRACE_EVENT_ASYNC_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category, \ + name, id, TRACE_EVENT_FLAG_NONE, "step", \ + step, arg1_name, arg1_val) +# define TRACE_EVENT_COPY_ASYNC_STEP0(category, name, id, step) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category, \ + name, id, TRACE_EVENT_FLAG_COPY, "step", \ + step) +# define TRACE_EVENT_COPY_ASYNC_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category, \ + name, id, TRACE_EVENT_FLAG_COPY, "step", \ + step, arg1_name, arg1_val) + +// Records a single ASYNC_END event for "name" immediately. If the category +// is not enabled, then this does nothing. +# define TRACE_EVENT_ASYNC_END0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_ASYNC_END1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val) +# define TRACE_EVENT_ASYNC_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val, arg2_name, arg2_val) +# define TRACE_EVENT_COPY_ASYNC_END0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_ASYNC_END1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val) +# define TRACE_EVENT_COPY_ASYNC_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val, arg2_name, arg2_val) + +// Records a single FLOW_BEGIN event called "name" immediately, with 0, 1 or 2 +// associated arguments. If the category is not enabled, then this +// does nothing. +// - category and name strings must have application lifetime (statics or +// literals). They may not include " chars. +// - `id` is used to match the FLOW_BEGIN event with the FLOW_END event. FLOW +// events are considered to match if their category, name and id values all +// match. `id` must either be a pointer or an integer value up to 64 bits. If +// it's a pointer, the bits will be xored with a hash of the process ID so +// that the same pointer on two different processes will not collide. +// FLOW events are different from ASYNC events in how they are drawn by the +// tracing UI. A FLOW defines asynchronous data flow, such as posting a task +// (FLOW_BEGIN) and later executing that task (FLOW_END). Expect FLOWs to be +// drawn as lines or arrows from FLOW_BEGIN scopes to FLOW_END scopes. Similar +// to ASYNC, a FLOW can consist of multiple phases. The first phase is defined +// by the FLOW_BEGIN calls. Additional phases can be defined using the FLOW_STEP +// macros. When the operation completes, call FLOW_END. An async operation can +// span threads and processes, but all events in that operation must use the +// same `name` and `id`. Each event can have its own args. +# define TRACE_EVENT_FLOW_BEGIN0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_FLOW_BEGIN1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val) +# define TRACE_EVENT_FLOW_BEGIN2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val, arg2_name, arg2_val) +# define TRACE_EVENT_COPY_FLOW_BEGIN0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_FLOW_BEGIN1(category, name, id, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val) +# define TRACE_EVENT_COPY_FLOW_BEGIN2(category, name, id, arg1_name, \ + arg1_val, arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val, arg2_name, arg2_val) + +// Records a single FLOW_STEP event for `step` immediately. If the category +// is not enabled, then this does nothing. The `name` and `id` must match the +// FLOW_BEGIN event above. The `step` param identifies this step within the +// async event. This should be called at the beginning of the next phase of an +// asynchronous operation. +# define TRACE_EVENT_FLOW_STEP0(category, name, id, step) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category, \ + name, id, TRACE_EVENT_FLAG_NONE, "step", \ + step) +# define TRACE_EVENT_FLOW_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category, \ + name, id, TRACE_EVENT_FLAG_NONE, "step", \ + step, arg1_name, arg1_val) +# define TRACE_EVENT_COPY_FLOW_STEP0(category, name, id, step) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category, \ + name, id, TRACE_EVENT_FLAG_COPY, "step", \ + step) +# define TRACE_EVENT_COPY_FLOW_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category, \ + name, id, TRACE_EVENT_FLAG_COPY, "step", \ + step, arg1_name, arg1_val) + +// Records a single FLOW_END event for "name" immediately. If the category +// is not enabled, then this does nothing. +# define TRACE_EVENT_FLOW_END0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE) +# define TRACE_EVENT_FLOW_END1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val) +# define TRACE_EVENT_FLOW_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_NONE, \ + arg1_name, arg1_val, arg2_name, arg2_val) +# define TRACE_EVENT_COPY_FLOW_END0(category, name, id) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY) +# define TRACE_EVENT_COPY_FLOW_END1(category, name, id, arg1_name, arg1_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val) +# define TRACE_EVENT_COPY_FLOW_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \ + name, id, TRACE_EVENT_FLAG_COPY, \ + arg1_name, arg1_val, arg2_name, arg2_val) + +//////////////////////////////////////////////////////////////////////////////// +// Implementation specific tracing API definitions. + +// Get a pointer to the enabled state of the given trace category. Only +// long-lived literal strings should be given as the category name. The returned +// pointer can be held permanently in a local static for example. If the +// unsigned char is non-zero, tracing is enabled. If tracing is enabled, +// TRACE_EVENT_API_ADD_TRACE_EVENT can be called. It's OK if tracing is disabled +// between the load of the tracing state and the call to +// TRACE_EVENT_API_ADD_TRACE_EVENT, because this flag only provides an early out +// for best performance when tracing is disabled. +// const unsigned char* +// TRACE_EVENT_API_GET_CATEGORY_ENABLED(const char* category_name) +# define TRACE_EVENT_API_GET_CATEGORY_ENABLED \ + webrtc::EventTracer::GetCategoryEnabled + +// Add a trace event to the platform tracing system. +// void TRACE_EVENT_API_ADD_TRACE_EVENT( +// char phase, +// const unsigned char* category_enabled, +// const char* name, +// unsigned long long id, +// int num_args, +// const char** arg_names, +// const unsigned char* arg_types, +// const unsigned long long* arg_values, +// unsigned char flags) +# define TRACE_EVENT_API_ADD_TRACE_EVENT MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT + +//////////////////////////////////////////////////////////////////////////////// + +// Implementation detail: trace event macros create temporary variables +// to keep instrumentation overhead low. These macros give each temporary +// variable a unique name based on the line number to prevent name collissions. +# define INTERNAL_TRACE_EVENT_UID3(a, b) trace_event_unique_##a##b +# define INTERNAL_TRACE_EVENT_UID2(a, b) INTERNAL_TRACE_EVENT_UID3(a, b) +# define INTERNAL_TRACE_EVENT_UID(name_prefix) \ + INTERNAL_TRACE_EVENT_UID2(name_prefix, __LINE__) + +# if WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS +# define INTERNAL_TRACE_EVENT_INFO_TYPE const unsigned char* +# else +# define INTERNAL_TRACE_EVENT_INFO_TYPE static const unsigned char* +# endif // WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS + +// Implementation detail: internal macro to create static category. +# define INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category) \ + INTERNAL_TRACE_EVENT_INFO_TYPE INTERNAL_TRACE_EVENT_UID(catstatic) = \ + reinterpret_cast(category); + +// Implementation detail: internal macro to create static category and add +// event if the category is enabled. +# define INTERNAL_TRACE_EVENT_ADD(phase, category, name, flags, ...) \ + do { \ + INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category); \ + if (*INTERNAL_TRACE_EVENT_UID(catstatic)) { \ + webrtc::trace_event_internal::AddTraceEvent( \ + phase, INTERNAL_TRACE_EVENT_UID(catstatic), name, \ + webrtc::trace_event_internal::kNoEventId, flags, ##__VA_ARGS__); \ + } \ + } while (0) + +// Implementation detail: internal macro to create static category and add begin +// event if the category is enabled. Also adds the end event when the scope +// ends. +# define INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, ...) \ + INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category); \ + webrtc::trace_event_internal::TraceEndOnScopeClose \ + INTERNAL_TRACE_EVENT_UID(profileScope); \ + if (*INTERNAL_TRACE_EVENT_UID(catstatic)) { \ + webrtc::trace_event_internal::AddTraceEvent( \ + TRACE_EVENT_PHASE_BEGIN, INTERNAL_TRACE_EVENT_UID(catstatic), name, \ + webrtc::trace_event_internal::kNoEventId, TRACE_EVENT_FLAG_NONE, \ + ##__VA_ARGS__); \ + INTERNAL_TRACE_EVENT_UID(profileScope) \ + .Initialize(INTERNAL_TRACE_EVENT_UID(catstatic), name); \ + } + +// Implementation detail: internal macro to create static category and add +// event if the category is enabled. +# define INTERNAL_TRACE_EVENT_ADD_WITH_ID(phase, category, name, id, flags, \ + ...) \ + do { \ + INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category); \ + if (*INTERNAL_TRACE_EVENT_UID(catstatic)) { \ + unsigned char trace_event_flags = flags | TRACE_EVENT_FLAG_HAS_ID; \ + webrtc::trace_event_internal::TraceID trace_event_trace_id( \ + id, &trace_event_flags); \ + webrtc::trace_event_internal::AddTraceEvent( \ + phase, INTERNAL_TRACE_EVENT_UID(catstatic), name, \ + trace_event_trace_id.data(), trace_event_flags, ##__VA_ARGS__); \ + } \ + } while (0) + +# ifdef MOZ_GECKO_PROFILER +# define MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT(phase, category_enabled, name, \ + id, num_args, arg_names, \ + arg_types, arg_values, flags) \ + uprofiler_simple_event_marker(name, phase, num_args, arg_names, \ + arg_types, arg_values); +# else +# define MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT(phase, category_enabled, name, \ + id, num_args, arg_names, \ + arg_types, arg_values, flags) +# endif + +// Notes regarding the following definitions: +// New values can be added and propagated to third party libraries, but existing +// definitions must never be changed, because third party libraries may use old +// definitions. + +// Phase indicates the nature of an event entry. E.g. part of a begin/end pair. +# define TRACE_EVENT_PHASE_BEGIN ('B') +# define TRACE_EVENT_PHASE_END ('E') +# define TRACE_EVENT_PHASE_INSTANT ('I') +# define TRACE_EVENT_PHASE_ASYNC_BEGIN ('S') +# define TRACE_EVENT_PHASE_ASYNC_STEP ('T') +# define TRACE_EVENT_PHASE_ASYNC_END ('F') +# define TRACE_EVENT_PHASE_FLOW_BEGIN ('s') +# define TRACE_EVENT_PHASE_FLOW_STEP ('t') +# define TRACE_EVENT_PHASE_FLOW_END ('f') +# define TRACE_EVENT_PHASE_METADATA ('M') +# define TRACE_EVENT_PHASE_COUNTER ('C') + +// Flags for changing the behavior of TRACE_EVENT_API_ADD_TRACE_EVENT. +# define TRACE_EVENT_FLAG_NONE (static_cast(0)) +# define TRACE_EVENT_FLAG_COPY (static_cast(1 << 0)) +# define TRACE_EVENT_FLAG_HAS_ID (static_cast(1 << 1)) +# define TRACE_EVENT_FLAG_MANGLE_ID (static_cast(1 << 2)) + +namespace webrtc { +namespace trace_event_internal { + +// Specify these values when the corresponding argument of AddTraceEvent is not +// used. +const int kZeroNumArgs = 0; +const unsigned long long kNoEventId = 0; + +// TraceID encapsulates an ID that can either be an integer or pointer. Pointers +// are mangled with the Process ID so that they are unlikely to collide when the +// same pointer is used on different processes. +class TraceID { + public: + class ForceMangle { + public: + explicit ForceMangle(unsigned long long id) : data_(id) {} + explicit ForceMangle(unsigned long id) : data_(id) {} + explicit ForceMangle(unsigned int id) : data_(id) {} + explicit ForceMangle(unsigned short id) : data_(id) {} + explicit ForceMangle(unsigned char id) : data_(id) {} + explicit ForceMangle(long long id) + : data_(static_cast(id)) {} + explicit ForceMangle(long id) + : data_(static_cast(id)) {} + explicit ForceMangle(int id) : data_(static_cast(id)) {} + explicit ForceMangle(short id) + : data_(static_cast(id)) {} + explicit ForceMangle(signed char id) + : data_(static_cast(id)) {} + + unsigned long long data() const { return data_; } + + private: + unsigned long long data_; + }; + + explicit TraceID(const void* id, unsigned char* flags) + : data_( + static_cast(reinterpret_cast(id))) { + *flags |= TRACE_EVENT_FLAG_MANGLE_ID; + } + explicit TraceID(ForceMangle id, unsigned char* flags) : data_(id.data()) { + *flags |= TRACE_EVENT_FLAG_MANGLE_ID; + } + explicit TraceID(unsigned long long id, unsigned char* flags) : data_(id) { + (void)flags; + } + explicit TraceID(unsigned long id, unsigned char* flags) : data_(id) { + (void)flags; + } + explicit TraceID(unsigned int id, unsigned char* flags) : data_(id) { + (void)flags; + } + explicit TraceID(unsigned short id, unsigned char* flags) : data_(id) { + (void)flags; + } + explicit TraceID(unsigned char id, unsigned char* flags) : data_(id) { + (void)flags; + } + explicit TraceID(long long id, unsigned char* flags) + : data_(static_cast(id)) { + (void)flags; + } + explicit TraceID(long id, unsigned char* flags) + : data_(static_cast(id)) { + (void)flags; + } + explicit TraceID(int id, unsigned char* flags) + : data_(static_cast(id)) { + (void)flags; + } + explicit TraceID(short id, unsigned char* flags) + : data_(static_cast(id)) { + (void)flags; + } + explicit TraceID(signed char id, unsigned char* flags) + : data_(static_cast(id)) { + (void)flags; + } + + unsigned long long data() const { return data_; } + + private: + unsigned long long data_; +}; + +// Simple union to store various types as unsigned long long. +union TraceValueUnion { + bool as_bool; + unsigned long long as_uint; + long long as_int; + double as_double; + const void* as_pointer; + const char* as_string; +}; + +// Simple container for const char* that should be copied instead of retained. +class TraceStringWithCopy { + public: + explicit TraceStringWithCopy(const char* str) : str_(str) {} + operator const char*() const { return str_; } + + private: + const char* str_; +}; + +// Define SetTraceValue for each allowed type. It stores the type and +// value in the return arguments. This allows this API to avoid declaring any +// structures so that it is portable to third_party libraries. +# define INTERNAL_DECLARE_SET_TRACE_VALUE(actual_type, union_member, \ + value_type_id) \ + static inline void SetTraceValue(actual_type arg, unsigned char* type, \ + unsigned long long* value) { \ + TraceValueUnion type_value; \ + type_value.union_member = arg; \ + *type = value_type_id; \ + *value = type_value.as_uint; \ + } +// Simpler form for int types that can be safely casted. +# define INTERNAL_DECLARE_SET_TRACE_VALUE_INT(actual_type, value_type_id) \ + static inline void SetTraceValue(actual_type arg, unsigned char* type, \ + unsigned long long* value) { \ + *type = value_type_id; \ + *value = static_cast(arg); \ + } + +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned long long, TRACE_VALUE_TYPE_UINT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned long, TRACE_VALUE_TYPE_UINT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned int, TRACE_VALUE_TYPE_UINT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned short, TRACE_VALUE_TYPE_UINT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned char, TRACE_VALUE_TYPE_UINT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(long long, TRACE_VALUE_TYPE_INT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(long, TRACE_VALUE_TYPE_INT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(int, TRACE_VALUE_TYPE_INT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(short, TRACE_VALUE_TYPE_INT) +INTERNAL_DECLARE_SET_TRACE_VALUE_INT(signed char, TRACE_VALUE_TYPE_INT) +INTERNAL_DECLARE_SET_TRACE_VALUE(bool, as_bool, TRACE_VALUE_TYPE_BOOL) +INTERNAL_DECLARE_SET_TRACE_VALUE(double, as_double, TRACE_VALUE_TYPE_DOUBLE) +INTERNAL_DECLARE_SET_TRACE_VALUE(const void*, as_pointer, + TRACE_VALUE_TYPE_POINTER) +INTERNAL_DECLARE_SET_TRACE_VALUE(const char*, as_string, + TRACE_VALUE_TYPE_STRING) +INTERNAL_DECLARE_SET_TRACE_VALUE(const TraceStringWithCopy&, as_string, + TRACE_VALUE_TYPE_COPY_STRING) + +# undef INTERNAL_DECLARE_SET_TRACE_VALUE +# undef INTERNAL_DECLARE_SET_TRACE_VALUE_INT + +// std::string version of SetTraceValue so that trace arguments can be strings. +static inline void SetTraceValue(const std::string& arg, unsigned char* type, + unsigned long long* value) { + TraceValueUnion type_value; + type_value.as_string = arg.c_str(); + *type = TRACE_VALUE_TYPE_COPY_STRING; + *value = type_value.as_uint; +} + +// These AddTraceEvent template functions are defined here instead of in the +// macro, because the arg_values could be temporary objects, such as +// std::string. In order to store pointers to the internal c_str and pass +// through to the tracing API, the arg_values must live throughout +// these procedures. + +static inline void AddTraceEvent(char phase, + const unsigned char* category_enabled, + const char* name, unsigned long long id, + unsigned char flags) { + TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id, + kZeroNumArgs, nullptr, nullptr, nullptr, + flags); +} + +template +static inline void AddTraceEvent(char phase, + const unsigned char* category_enabled, + const char* name, unsigned long long id, + unsigned char flags, const char* arg1_name, + const ARG1_TYPE& arg1_val) { + const int num_args = 1; + unsigned char arg_types[1]; + unsigned long long arg_values[1]; + SetTraceValue(arg1_val, &arg_types[0], &arg_values[0]); + TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id, num_args, + &arg1_name, arg_types, arg_values, flags); +} + +template +static inline void AddTraceEvent(char phase, + const unsigned char* category_enabled, + const char* name, unsigned long long id, + unsigned char flags, const char* arg1_name, + const ARG1_TYPE& arg1_val, + const char* arg2_name, + const ARG2_TYPE& arg2_val) { + const int num_args = 2; + const char* arg_names[2] = {arg1_name, arg2_name}; + unsigned char arg_types[2]; + unsigned long long arg_values[2]; + SetTraceValue(arg1_val, &arg_types[0], &arg_values[0]); + SetTraceValue(arg2_val, &arg_types[1], &arg_values[1]); + TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id, num_args, + arg_names, arg_types, arg_values, flags); +} + +// Used by TRACE_EVENTx macro. Do not use directly. +class TraceEndOnScopeClose { + public: + // Note: members of data_ intentionally left uninitialized. See Initialize. + TraceEndOnScopeClose() : p_data_(nullptr) {} + ~TraceEndOnScopeClose() { + if (p_data_) AddEventIfEnabled(); + } + + void Initialize(const unsigned char* category_enabled, const char* name) { + data_.category_enabled = category_enabled; + data_.name = name; + p_data_ = &data_; + } + + private: + // Add the end event if the category is still enabled. + void AddEventIfEnabled() { + // Only called when p_data_ is non-null. + if (*p_data_->category_enabled) { + TRACE_EVENT_API_ADD_TRACE_EVENT(TRACE_EVENT_PHASE_END, + p_data_->category_enabled, p_data_->name, + kNoEventId, kZeroNumArgs, nullptr, + nullptr, nullptr, TRACE_EVENT_FLAG_NONE); + } + } + + // This Data struct workaround is to avoid initializing all the members + // in Data during construction of this object, since this object is always + // constructed, even when tracing is disabled. If the members of Data were + // members of this class instead, compiler warnings occur about potential + // uninitialized accesses. + struct Data { + const unsigned char* category_enabled; + const char* name; + }; + Data* p_data_; + Data data_; +}; + +} // namespace trace_event_internal +} // namespace webrtc +#else + +//////////////////////////////////////////////////////////////////////////////// +// This section defines no-op alternatives to the tracing macros when +// RTC_DISABLE_TRACE_EVENTS is defined. + +# define RTC_NOOP() \ + do { \ + } while (0) + +# define TRACE_STR_COPY(str) RTC_NOOP() + +# define TRACE_DISABLED_BY_DEFAULT(name) "disabled-by-default-" name + +# define TRACE_ID_MANGLE(id) 0 + +# define TRACE_EVENT0(category, name) RTC_NOOP() +# define TRACE_EVENT1(category, name, arg1_name, arg1_val) RTC_NOOP() +# define TRACE_EVENT2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_INSTANT0(category, name) RTC_NOOP() +# define TRACE_EVENT_INSTANT1(category, name, arg1_name, arg1_val) RTC_NOOP() + +# define TRACE_EVENT_INSTANT2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_COPY_INSTANT0(category, name) RTC_NOOP() +# define TRACE_EVENT_COPY_INSTANT1(category, name, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_INSTANT2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_BEGIN0(category, name) RTC_NOOP() +# define TRACE_EVENT_BEGIN1(category, name, arg1_name, arg1_val) RTC_NOOP() +# define TRACE_EVENT_BEGIN2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_BEGIN0(category, name) RTC_NOOP() +# define TRACE_EVENT_COPY_BEGIN1(category, name, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_BEGIN2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_END0(category, name) RTC_NOOP() +# define TRACE_EVENT_END1(category, name, arg1_name, arg1_val) RTC_NOOP() +# define TRACE_EVENT_END2(category, name, arg1_name, arg1_val, arg2_name, \ + arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_END0(category, name) RTC_NOOP() +# define TRACE_EVENT_COPY_END1(category, name, arg1_name, arg1_val) RTC_NOOP() +# define TRACE_EVENT_COPY_END2(category, name, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_COUNTER1(category, name, value) RTC_NOOP() +# define TRACE_COPY_COUNTER1(category, name, value) RTC_NOOP() + +# define TRACE_COUNTER2(category, name, value1_name, value1_val, value2_name, \ + value2_val) \ + RTC_NOOP() +# define TRACE_COPY_COUNTER2(category, name, value1_name, value1_val, \ + value2_name, value2_val) \ + RTC_NOOP() + +# define TRACE_COUNTER_ID1(category, name, id, value) RTC_NOOP() +# define TRACE_COPY_COUNTER_ID1(category, name, id, value) RTC_NOOP() + +# define TRACE_COUNTER_ID2(category, name, id, value1_name, value1_val, \ + value2_name, value2_val) \ + RTC_NOOP() +# define TRACE_COPY_COUNTER_ID2(category, name, id, value1_name, value1_val, \ + value2_name, value2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_ASYNC_BEGIN0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_ASYNC_BEGIN1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_ASYNC_BEGIN2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_BEGIN0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_BEGIN1(category, name, id, arg1_name, \ + arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_BEGIN2(category, name, id, arg1_name, \ + arg1_val, arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_ASYNC_STEP0(category, name, id, step) RTC_NOOP() +# define TRACE_EVENT_ASYNC_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_STEP0(category, name, id, step) RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + RTC_NOOP() + +# define TRACE_EVENT_ASYNC_END0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_ASYNC_END1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_ASYNC_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_END0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_END1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_ASYNC_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_FLOW_BEGIN0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_FLOW_BEGIN1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_FLOW_BEGIN2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_BEGIN0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_BEGIN1(category, name, id, arg1_name, \ + arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_BEGIN2(category, name, id, arg1_name, \ + arg1_val, arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_FLOW_STEP0(category, name, id, step) RTC_NOOP() +# define TRACE_EVENT_FLOW_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_STEP0(category, name, id, step) RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_STEP1(category, name, id, step, arg1_name, \ + arg1_val) \ + RTC_NOOP() + +# define TRACE_EVENT_FLOW_END0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_FLOW_END1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_FLOW_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_END0(category, name, id) RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_END1(category, name, id, arg1_name, arg1_val) \ + RTC_NOOP() +# define TRACE_EVENT_COPY_FLOW_END2(category, name, id, arg1_name, arg1_val, \ + arg2_name, arg2_val) \ + RTC_NOOP() + +# define TRACE_EVENT_API_GET_CATEGORY_ENABLED "" + +# define TRACE_EVENT_API_ADD_TRACE_EVENT RTC_NOOP() + +#endif // RTC_TRACE_EVENTS_ENABLED + +#endif // GECKO_TRACE_EVENT_H_ diff --git a/tools/profiler/public/MicroGeckoProfiler.h b/tools/profiler/public/MicroGeckoProfiler.h new file mode 100644 index 0000000000..7b735e1eec --- /dev/null +++ b/tools/profiler/public/MicroGeckoProfiler.h @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This contains things related to the Gecko profiler, for use in third_party +// code. It is very minimal and is designed to be used by patching over +// upstream code. +// Only use the C ABI and guard C++ code with #ifdefs, don't pull anything from +// Gecko, it must be possible to include the header file into any C++ codebase. + +#ifndef MICRO_GECKO_PROFILER +#define MICRO_GECKO_PROFILER + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#ifdef _WIN32 +# include +#else +# include +#endif + +extern MOZ_EXPORT void uprofiler_register_thread(const char* aName, + void* aGuessStackTop); + +extern MOZ_EXPORT void uprofiler_unregister_thread(); + +extern MOZ_EXPORT void uprofiler_simple_event_marker( + const char* name, char phase, int num_args, const char** arg_names, + const unsigned char* arg_types, const unsigned long long* arg_values); +#ifdef __cplusplus +} + +struct AutoRegisterProfiler { + AutoRegisterProfiler(const char* name, char* stacktop) { + if (getenv("MOZ_UPROFILER_LOG_THREAD_CREATION")) { + printf("### UProfiler: new thread: '%s'\n", name); + } + uprofiler_register_thread(name, stacktop); + } + ~AutoRegisterProfiler() { uprofiler_unregister_thread(); } +}; +#endif // __cplusplus + +void uprofiler_simple_event_marker(const char* name, char phase, int num_args, + const char** arg_names, + const unsigned char* arg_types, + const unsigned long long* arg_values); + +struct UprofilerFuncPtrs { + void (*register_thread)(const char* aName, void* aGuessStackTop); + void (*unregister_thread)(); + void (*simple_event_marker)(const char* name, char phase, int num_args, + const char** arg_names, + const unsigned char* arg_types, + const unsigned long long* arg_values); +}; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + +static void register_thread_noop(const char* aName, void* aGuessStackTop) { + /* no-op */ +} +static void unregister_thread_noop() { /* no-op */ +} +static void simple_event_marker_noop(const char* name, char phase, int num_args, + const char** arg_names, + const unsigned char* arg_types, + const unsigned long long* arg_values) { + /* no-op */ +} + +#pragma GCC diagnostic pop + +#if defined(_WIN32) +# define UPROFILER_OPENLIB() GetModuleHandle(NULL) +#else +# define UPROFILER_OPENLIB() dlopen(NULL, RTLD_NOW) +#endif + +#if defined(_WIN32) +# define UPROFILER_GET_SYM(handle, sym) GetProcAddress(handle, sym) +#else +# define UPROFILER_GET_SYM(handle, sym) dlsym(handle, sym) +#endif + +#if defined(_WIN32) +# define UPROFILER_PRINT_ERROR(func) fprintf(stderr, "%s error\n", #func); +#else +# define UPROFILER_PRINT_ERROR(func) \ + fprintf(stderr, "%s error: %s\n", #func, dlerror()); +#endif + +// Assumes that a variable of type UprofilerFuncPtrs, named uprofiler +// is accessible in the scope +#define UPROFILER_GET_FUNCTIONS() \ + void* handle = UPROFILER_OPENLIB(); \ + if (!handle) { \ + UPROFILER_PRINT_ERROR(UPROFILER_OPENLIB); \ + uprofiler.register_thread = register_thread_noop; \ + uprofiler.unregister_thread = unregister_thread_noop; \ + uprofiler.simple_event_marker = simple_event_marker_noop; \ + } \ + uprofiler.register_thread = \ + UPROFILER_GET_SYM(handle, "uprofiler_register_thread"); \ + if (!uprofiler.register_thread) { \ + UPROFILER_PRINT_ERROR(uprofiler_unregister_thread); \ + uprofiler.register_thread = register_thread_noop; \ + } \ + uprofiler.unregister_thread = \ + UPROFILER_GET_SYM(handle, "uprofiler_unregister_thread"); \ + if (!uprofiler.unregister_thread) { \ + UPROFILER_PRINT_ERROR(uprofiler_unregister_thread); \ + uprofiler.unregister_thread = unregister_thread_noop; \ + } \ + uprofiler.simple_event_marker = \ + UPROFILER_GET_SYM(handle, "uprofiler_simple_event_marker"); \ + if (!uprofiler.simple_event_marker) { \ + UPROFILER_PRINT_ERROR(uprofiler_simple_event_marker); \ + uprofiler.simple_event_marker = simple_event_marker_noop; \ + } + +#endif // MICRO_GECKO_PROFILER diff --git a/tools/profiler/public/ProfileAdditionalInformation.h b/tools/profiler/public/ProfileAdditionalInformation.h new file mode 100644 index 0000000000..c4cc8697b0 --- /dev/null +++ b/tools/profiler/public/ProfileAdditionalInformation.h @@ -0,0 +1,90 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// The Gecko Profiler is an always-on profiler that takes fast and low overhead +// samples of the program execution using only userspace functionality for +// portability. The goal of this module is to provide performance data in a +// generic cross-platform way without requiring custom tools or kernel support. +// +// Samples are collected to form a timeline with optional timeline event +// (markers) used for filtering. The samples include both native stacks and +// platform-independent "label stack" frames. + +#ifndef ProfileAdditionalInformation_h +#define ProfileAdditionalInformation_h + +#ifdef MOZ_GECKO_PROFILER +# include "shared-libraries.h" +#endif +#include "js/Value.h" +#include "nsString.h" + +namespace IPC { +class MessageReader; +class MessageWriter; +template +struct ParamTraits; +} // namespace IPC + +namespace mozilla { +// This structure contains additional information gathered while generating the +// profile json and iterating the buffer. +struct ProfileGenerationAdditionalInformation { +#ifdef MOZ_GECKO_PROFILER + ProfileGenerationAdditionalInformation() = default; + explicit ProfileGenerationAdditionalInformation( + const SharedLibraryInfo&& aSharedLibraries) + : mSharedLibraries(aSharedLibraries) {} + + size_t SizeOf() const { return mSharedLibraries.SizeOf(); } + + void Append(ProfileGenerationAdditionalInformation&& aOther) { + mSharedLibraries.AddAllSharedLibraries(aOther.mSharedLibraries); + } + + void FinishGathering() { mSharedLibraries.DeduplicateEntries(); } + + void ToJSValue(JSContext* aCx, JS::MutableHandle aRetVal) const; + + SharedLibraryInfo mSharedLibraries; +#endif // MOZ_GECKO_PROFILER +}; + +struct ProfileAndAdditionalInformation { + ProfileAndAdditionalInformation() = default; + explicit ProfileAndAdditionalInformation(const nsCString&& aProfile) + : mProfile(aProfile) {} + + ProfileAndAdditionalInformation( + const nsCString&& aProfile, + const ProfileGenerationAdditionalInformation&& aAdditionalInformation) + : mProfile(aProfile), + mAdditionalInformation(Some(aAdditionalInformation)) {} + + size_t SizeOf() const { + size_t size = mProfile.Length(); +#ifdef MOZ_GECKO_PROFILER + if (mAdditionalInformation.isSome()) { + size += mAdditionalInformation->SizeOf(); + } +#endif + return size; + } + + nsCString mProfile; + Maybe mAdditionalInformation; +}; +} // namespace mozilla + +namespace IPC { +template <> +struct ParamTraits { + typedef mozilla::ProfileGenerationAdditionalInformation paramType; + + static void Write(MessageWriter* aWriter, const paramType& aParam); + static bool Read(MessageReader* aReader, paramType* aResult); +}; +} // namespace IPC + +#endif // ProfileAdditionalInformation_h diff --git a/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h b/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h new file mode 100644 index 0000000000..1578bd2ddc --- /dev/null +++ b/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h @@ -0,0 +1,160 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntrySerializationGeckoExtensions_h +#define ProfileBufferEntrySerializationGeckoExtensions_h + +#include "mozilla/ProfileBufferEntrySerialization.h" + +#include "js/AllocPolicy.h" +#include "js/Utility.h" +#include "nsString.h" + +namespace mozilla { + +// ---------------------------------------------------------------------------- +// ns[C]String + +// nsString or nsCString contents are serialized as the number of bytes (encoded +// as ULEB128) and all the characters in the string. The terminal '\0' is +// omitted. +// Make sure you write and read with the same character type! +// +// Usage: `nsCString s = ...; aEW.WriteObject(s);` +template +struct ProfileBufferEntryWriter::Serializer> { + static Length Bytes(const nsTString& aS) { + const auto length = aS.Length(); + return ProfileBufferEntryWriter::ULEB128Size(length) + + static_cast(length * sizeof(CHAR)); + } + + static void Write(ProfileBufferEntryWriter& aEW, const nsTString& aS) { + const auto length = aS.Length(); + aEW.WriteULEB128(length); + // Copy the bytes from the string's buffer. + aEW.WriteBytes(aS.Data(), length * sizeof(CHAR)); + } +}; + +template +struct ProfileBufferEntryReader::Deserializer> { + static void ReadInto(ProfileBufferEntryReader& aER, nsTString& aS) { + aS = Read(aER); + } + + static nsTString Read(ProfileBufferEntryReader& aER) { + const Length length = aER.ReadULEB128(); + nsTString s; + // BulkWrite is the most efficient way to copy bytes into the target string. + auto writerOrErr = s.BulkWrite(length, 0, true); + MOZ_RELEASE_ASSERT(!writerOrErr.isErr()); + + auto writer = writerOrErr.unwrap(); + + aER.ReadBytes(writer.Elements(), length * sizeof(CHAR)); + writer.Finish(length, true); + return s; + } +}; + +// ---------------------------------------------------------------------------- +// nsAuto[C]String + +// nsAuto[C]String contents are serialized as the number of bytes (encoded as +// ULEB128) and all the characters in the string. The terminal '\0' is omitted. +// Make sure you write and read with the same character type! +// +// Usage: `nsAutoCString s = ...; aEW.WriteObject(s);` +template +struct ProfileBufferEntryWriter::Serializer> { + static Length Bytes(const nsTAutoStringN& aS) { + const auto length = aS.Length(); + return ProfileBufferEntryWriter::ULEB128Size(length) + + static_cast(length * sizeof(CHAR)); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const nsTAutoStringN& aS) { + const auto length = aS.Length(); + aEW.WriteULEB128(length); + // Copy the bytes from the string's buffer. + aEW.WriteBytes(aS.BeginReading(), length * sizeof(CHAR)); + } +}; + +template +struct ProfileBufferEntryReader::Deserializer> { + static void ReadInto(ProfileBufferEntryReader& aER, + nsTAutoStringN& aS) { + aS = Read(aER); + } + + static nsTAutoStringN Read(ProfileBufferEntryReader& aER) { + const auto length = aER.ReadULEB128(); + nsTAutoStringN s; + // BulkWrite is the most efficient way to copy bytes into the target string. + auto writerOrErr = s.BulkWrite(length, 0, true); + MOZ_RELEASE_ASSERT(!writerOrErr.isErr()); + + auto writer = writerOrErr.unwrap(); + aER.ReadBytes(writer.Elements(), length * sizeof(CHAR)); + writer.Finish(length, true); + return s; + } +}; + +// ---------------------------------------------------------------------------- +// JS::UniqueChars + +// JS::UniqueChars contents are serialized as the number of bytes (encoded as +// ULEB128) and all the characters in the string. The terminal '\0' is omitted. +// Note: A nullptr pointer will be serialized like an empty string, so when +// deserializing it will result in an allocated buffer only containing a +// single null terminator. +// +// Usage: `JS::UniqueChars s = ...; aEW.WriteObject(s);` +template <> +struct ProfileBufferEntryWriter::Serializer { + static Length Bytes(const JS::UniqueChars& aS) { + if (!aS) { + return ProfileBufferEntryWriter::ULEB128Size(0); + } + const auto len = static_cast(strlen(aS.get())); + return ProfileBufferEntryWriter::ULEB128Size(len) + len; + } + + static void Write(ProfileBufferEntryWriter& aEW, const JS::UniqueChars& aS) { + if (!aS) { + aEW.WriteULEB128(0); + return; + } + const auto len = static_cast(strlen(aS.get())); + aEW.WriteULEB128(len); + aEW.WriteBytes(aS.get(), len); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer { + static void ReadInto(ProfileBufferEntryReader& aER, JS::UniqueChars& aS) { + aS = Read(aER); + } + + static JS::UniqueChars Read(ProfileBufferEntryReader& aER) { + const auto len = aER.ReadULEB128(); + // Use the same allocation policy as JS_smprintf. + char* buffer = + static_cast(js::SystemAllocPolicy{}.pod_malloc(len + 1)); + aER.ReadBytes(buffer, len); + buffer[len] = '\0'; + return JS::UniqueChars(buffer); + } +}; + +} // namespace mozilla + +#endif // ProfileBufferEntrySerializationGeckoExtensions_h diff --git a/tools/profiler/public/ProfileJSONWriter.h b/tools/profiler/public/ProfileJSONWriter.h new file mode 100644 index 0000000000..8d23d7a890 --- /dev/null +++ b/tools/profiler/public/ProfileJSONWriter.h @@ -0,0 +1,19 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILEJSONWRITER_H +#define PROFILEJSONWRITER_H + +#include "mozilla/BaseProfileJSONWriter.h" + +using ChunkedJSONWriteFunc = mozilla::baseprofiler::ChunkedJSONWriteFunc; +using JSONSchemaWriter = mozilla::baseprofiler::JSONSchemaWriter; +using OStreamJSONWriteFunc = mozilla::baseprofiler::OStreamJSONWriteFunc; +using SpliceableChunkedJSONWriter = + mozilla::baseprofiler::SpliceableChunkedJSONWriter; +using SpliceableJSONWriter = mozilla::baseprofiler::SpliceableJSONWriter; +using UniqueJSONStrings = mozilla::baseprofiler::UniqueJSONStrings; + +#endif // PROFILEJSONWRITER_H diff --git a/tools/profiler/public/ProfilerBindings.h b/tools/profiler/public/ProfilerBindings.h new file mode 100644 index 0000000000..096a860130 --- /dev/null +++ b/tools/profiler/public/ProfilerBindings.h @@ -0,0 +1,162 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* FFI functions for Profiler Rust API to call into profiler */ + +#ifndef ProfilerBindings_h +#define ProfilerBindings_h + +#include "mozilla/BaseProfilerMarkersPrerequisites.h" + +#include +#include + +namespace mozilla { +class AutoProfilerLabel; +class MarkerSchema; +class MarkerTiming; +class TimeStamp; +enum class StackCaptureOptions; + +namespace baseprofiler { +enum class ProfilingCategoryPair : uint32_t; +class SpliceableJSONWriter; +} // namespace baseprofiler + +} // namespace mozilla + +namespace JS { +enum class ProfilingCategoryPair : uint32_t; +} // namespace JS + +// Everything in here is safe to include unconditionally, implementations must +// take !MOZ_GECKO_PROFILER into account. +extern "C" { + +void gecko_profiler_register_thread(const char* aName); +void gecko_profiler_unregister_thread(); + +void gecko_profiler_construct_label(mozilla::AutoProfilerLabel* aAutoLabel, + JS::ProfilingCategoryPair aCategoryPair); +void gecko_profiler_destruct_label(mozilla::AutoProfilerLabel* aAutoLabel); + +// Construct, clone and destruct the timestamp for profiler time. +void gecko_profiler_construct_timestamp_now(mozilla::TimeStamp* aTimeStamp); +void gecko_profiler_clone_timestamp(const mozilla::TimeStamp* aSrcTimeStamp, + mozilla::TimeStamp* aDestTimeStamp); +void gecko_profiler_destruct_timestamp(mozilla::TimeStamp* aTimeStamp); + +// Addition and subtraction for timestamp. +void gecko_profiler_add_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds); +void gecko_profiler_subtract_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds); + +// Various MarkerTiming constructors and a destructor. +void gecko_profiler_construct_marker_timing_instant_at( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime); +void gecko_profiler_construct_marker_timing_instant_now( + mozilla::MarkerTiming* aMarkerTiming); +void gecko_profiler_construct_marker_timing_interval( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime, + const mozilla::TimeStamp* aEndTime); +void gecko_profiler_construct_marker_timing_interval_until_now_from( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime); +void gecko_profiler_construct_marker_timing_interval_start( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime); +void gecko_profiler_construct_marker_timing_interval_end( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime); +void gecko_profiler_destruct_marker_timing( + mozilla::MarkerTiming* aMarkerTiming); + +// MarkerSchema constructors and destructor. +void gecko_profiler_construct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema, + const mozilla::MarkerSchema::Location* aLocations, size_t aLength); +void gecko_profiler_construct_marker_schema_with_special_front_end_location( + mozilla::MarkerSchema* aMarkerSchema); +void gecko_profiler_destruct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema); + +// MarkerSchema methods for adding labels. +void gecko_profiler_marker_schema_set_chart_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength); +void gecko_profiler_marker_schema_set_tooltip_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength); +void gecko_profiler_marker_schema_set_table_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength); +void gecko_profiler_marker_schema_set_all_labels(mozilla::MarkerSchema* aSchema, + const char* aLabel, + size_t aLabelLength); + +// MarkerSchema methods for adding key/key-label values. +void gecko_profiler_marker_schema_add_key_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat); +void gecko_profiler_marker_schema_add_key_label_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat); +void gecko_profiler_marker_schema_add_key_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable); +void gecko_profiler_marker_schema_add_key_label_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable); +void gecko_profiler_marker_schema_add_static_label_value( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength, + const char* aValue, size_t aValueLength); + +// Stream MarkerSchema to SpliceableJSONWriter. +void gecko_profiler_marker_schema_stream( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, mozilla::MarkerSchema* aMarkerSchema, + void* aStreamedNamesSet); + +// Various SpliceableJSONWriter methods to add properties. +void gecko_profiler_json_writer_int_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, int64_t aValue); +void gecko_profiler_json_writer_float_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, double aValue); +void gecko_profiler_json_writer_bool_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, bool aValue); +void gecko_profiler_json_writer_string_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, const char* aValue, size_t aValueLength); +void gecko_profiler_json_writer_null_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength); + +// Marker APIs. +void gecko_profiler_add_marker_untyped( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions); +void gecko_profiler_add_marker_text( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, const char* aText, + size_t aTextLength); +void gecko_profiler_add_marker( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, uint8_t aMarkerTag, + const uint8_t* aPayload, size_t aPayloadSize); + +} // extern "C" + +#endif // ProfilerBindings_h diff --git a/tools/profiler/public/ProfilerChild.h b/tools/profiler/public/ProfilerChild.h new file mode 100644 index 0000000000..a781784aae --- /dev/null +++ b/tools/profiler/public/ProfilerChild.h @@ -0,0 +1,106 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerChild_h +#define ProfilerChild_h + +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/DataMutex.h" +#include "mozilla/PProfilerChild.h" +#include "mozilla/ProfileBufferControlledChunkManager.h" +#include "mozilla/ProgressLogger.h" +#include "mozilla/RefPtr.h" +#include "ProfileAdditionalInformation.h" + +class nsIThread; +struct PRThread; + +namespace mozilla { + +// The ProfilerChild actor is created in all processes except for the main +// process. The corresponding ProfilerParent actor is created in the main +// process, and it will notify us about profiler state changes and request +// profiles from us. +class ProfilerChild final : public PProfilerChild, + public mozilla::ipc::IShmemAllocator { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ProfilerChild, final) + + ProfilerChild(); + + // Collects and returns a profile. + // This method can be used to grab a profile just before PProfiler is torn + // down. The collected profile should then be sent through a different + // message channel that is guaranteed to stay open long enough. + ProfileAndAdditionalInformation GrabShutdownProfile(); + + void Destroy(); + + // This should be called regularly from outside of the profiler lock. + static void ProcessPendingUpdate(); + + static bool IsLockedOnCurrentThread(); + + private: + virtual ~ProfilerChild(); + + mozilla::ipc::IPCResult RecvStart(const ProfilerInitParams& params, + StartResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvEnsureStarted( + const ProfilerInitParams& params, + EnsureStartedResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvStop(StopResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvPause(PauseResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvResume(ResumeResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvPauseSampling( + PauseSamplingResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvResumeSampling( + ResumeSamplingResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvWaitOnePeriodicSampling( + WaitOnePeriodicSamplingResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvAwaitNextChunkManagerUpdate( + AwaitNextChunkManagerUpdateResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvDestroyReleasedChunksAtOrBefore( + const TimeStamp& aTimeStamp) override; + mozilla::ipc::IPCResult RecvGatherProfile( + GatherProfileResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvGetGatherProfileProgress( + GetGatherProfileProgressResolver&& aResolve) override; + mozilla::ipc::IPCResult RecvClearAllPages() override; + + void ActorDestroy(ActorDestroyReason aActorDestroyReason) override; + + FORWARD_SHMEM_ALLOCATOR_TO(PProfilerChild) + + void SetupChunkManager(); + void ResetChunkManager(); + void ResolveChunkUpdate( + PProfilerChild::AwaitNextChunkManagerUpdateResolver& aResolve); + void ProcessChunkManagerUpdate( + ProfileBufferControlledChunkManager::Update&& aUpdate); + + static void GatherProfileThreadFunction(void* already_AddRefedParameters); + + nsCOMPtr mThread; + bool mDestroyed; + + ProfileBufferControlledChunkManager* mChunkManager = nullptr; + AwaitNextChunkManagerUpdateResolver mAwaitNextChunkManagerUpdateResolver; + ProfileBufferControlledChunkManager::Update mChunkManagerUpdate; + + struct ProfilerChildAndUpdate { + RefPtr mProfilerChild; + ProfileBufferControlledChunkManager::Update mUpdate; + }; + static DataMutexBase + sPendingChunkManagerUpdate; + + RefPtr mGatherProfileProgress; +}; + +} // namespace mozilla + +#endif // ProfilerChild_h diff --git a/tools/profiler/public/ProfilerCodeAddressService.h b/tools/profiler/public/ProfilerCodeAddressService.h new file mode 100644 index 0000000000..9d75c363b3 --- /dev/null +++ b/tools/profiler/public/ProfilerCodeAddressService.h @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerCodeAddressService_h +#define ProfilerCodeAddressService_h + +#include "CodeAddressService.h" +#include "nsTArray.h" + +namespace mozilla { + +// This SymbolTable struct, and the CompactSymbolTable struct in the +// profiler rust module, have the exact same memory layout. +// nsTArray and ThinVec are FFI-compatible, because the thin-vec crate is +// being compiled with the "gecko-ffi" feature enabled. +struct SymbolTable { + SymbolTable() = default; + SymbolTable(SymbolTable&& aOther) = default; + + nsTArray mAddrs; + nsTArray mIndex; + nsTArray mBuffer; +}; + +} // namespace mozilla + +/** + * Cache and look up function symbol names. + * + * We don't template this on AllocPolicy since we need to use nsTArray in + * SymbolTable above, which doesn't work with AllocPolicy. (We can't switch + * to Vector, as we would lose FFI compatibility with ThinVec.) + */ +class ProfilerCodeAddressService : public mozilla::CodeAddressService<> { + public: + // Like GetLocation, but only returns the symbol name. + bool GetFunction(const void* aPc, nsACString& aResult); + + private: +#if defined(XP_LINUX) || defined(XP_FREEBSD) + // Map of library names (owned by mLibraryStrings) to SymbolTables filled + // in by profiler_get_symbol_table. + mozilla::HashMap, AllocPolicy> + mSymbolTables; +#endif +}; + +#endif // ProfilerCodeAddressService_h diff --git a/tools/profiler/public/ProfilerControl.h b/tools/profiler/public/ProfilerControl.h new file mode 100644 index 0000000000..466d15eb69 --- /dev/null +++ b/tools/profiler/public/ProfilerControl.h @@ -0,0 +1,190 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// APIs that control the lifetime of the profiler: Initialization, start, pause, +// resume, stop, and shutdown. + +#ifndef ProfilerControl_h +#define ProfilerControl_h + +#include "mozilla/BaseProfilerRAIIMacro.h" + +// Everything in here is also safe to include unconditionally, and only defines +// empty macros if MOZ_GECKO_PROFILER is unset. +// If your file only uses particular APIs (e.g., only markers), please consider +// including only the needed headers instead of this one, to reduce compilation +// dependencies. + +enum class IsFastShutdown { + No, + Yes, +}; + +#ifndef MOZ_GECKO_PROFILER + +// This file can be #included unconditionally. However, everything within this +// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the +// following macros and functions, which encapsulate the most common operations +// and thus avoid the need for many #ifdefs. + +# define AUTO_PROFILER_INIT ::profiler_init_main_thread_id() +# define AUTO_PROFILER_INIT2 + +// Function stubs for when MOZ_GECKO_PROFILER is not defined. + +static inline void profiler_init(void* stackTop) {} + +static inline void profiler_shutdown( + IsFastShutdown aIsFastShutdown = IsFastShutdown::No) {} + +#else // !MOZ_GECKO_PROFILER + +# include "BaseProfiler.h" +# include "mozilla/Attributes.h" +# include "mozilla/Maybe.h" +# include "mozilla/MozPromise.h" +# include "mozilla/PowerOfTwo.h" +# include "mozilla/Vector.h" + +//--------------------------------------------------------------------------- +// Start and stop the profiler +//--------------------------------------------------------------------------- + +static constexpr mozilla::PowerOfTwo32 PROFILER_DEFAULT_ENTRIES = + mozilla::baseprofiler::BASE_PROFILER_DEFAULT_ENTRIES; + +static constexpr mozilla::PowerOfTwo32 PROFILER_DEFAULT_STARTUP_ENTRIES = + mozilla::baseprofiler::BASE_PROFILER_DEFAULT_STARTUP_ENTRIES; + +# define PROFILER_DEFAULT_INTERVAL BASE_PROFILER_DEFAULT_INTERVAL +# define PROFILER_MAX_INTERVAL BASE_PROFILER_MAX_INTERVAL + +# define PROFILER_DEFAULT_ACTIVE_TAB_ID 0 + +// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will +// also be started. This call must happen before any other profiler calls +// (except profiler_start(), which will call profiler_init() if it hasn't +// already run). +void profiler_init(void* stackTop); +void profiler_init_threadmanager(); + +// Call this as early as possible +# define AUTO_PROFILER_INIT mozilla::AutoProfilerInit PROFILER_RAII +// Call this after the nsThreadManager is Init()ed +# define AUTO_PROFILER_INIT2 mozilla::AutoProfilerInit2 PROFILER_RAII + +// Clean up the profiler module, stopping it if required. This function may +// also save a shutdown profile if requested. No profiler calls should happen +// after this point and all profiling stack labels should have been popped. +void profiler_shutdown(IsFastShutdown aIsFastShutdown = IsFastShutdown::No); + +// Start the profiler -- initializing it first if necessary -- with the +// selected options. Stops and restarts the profiler if it is already active. +// After starting the profiler is "active". The samples will be recorded in a +// circular buffer. +// "aCapacity" is the maximum number of 8-bytes entries in the profiler's +// circular buffer. +// "aInterval" the sampling interval, measured in millseconds. +// "aFeatures" is the feature set. Features unsupported by this +// platform/configuration are ignored. +// "aFilters" is the list of thread filters. Threads that do not match any +// of the filters are not profiled. A filter matches a thread if +// (a) the thread name contains the filter as a case-insensitive +// substring, or +// (b) the filter is of the form "pid:" where n is the process +// id of the process that the thread is running in. +// "aActiveTabID" BrowserId of the active browser screen's active tab. +// It's being used to determine the profiled tab. It's "0" if +// we failed to get the ID. +// "aDuration" is the duration of entries in the profiler's circular buffer. +// Returns as soon as this process' profiler has started, the returned promise +// gets resolved when profilers in sub-processes (if any) have started. +RefPtr profiler_start( + mozilla::PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const mozilla::Maybe& aDuration = mozilla::Nothing()); + +// Stop the profiler and discard the profile without saving it. A no-op if the +// profiler is inactive. After stopping the profiler is "inactive". +// Returns as soon as this process' profiler has stopped, the returned promise +// gets resolved when profilers in sub-processes (if any) have stopped. +RefPtr profiler_stop(); + +// If the profiler is inactive, start it. If it's already active, restart it if +// the requested settings differ from the current settings. Both the check and +// the state change are performed while the profiler state is locked. +// The only difference to profiler_start is that the current buffer contents are +// not discarded if the profiler is already running with the requested settings. +void profiler_ensure_started( + mozilla::PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const mozilla::Maybe& aDuration = mozilla::Nothing()); + +//--------------------------------------------------------------------------- +// Control the profiler +//--------------------------------------------------------------------------- + +// Pause and resume the profiler. No-ops if the profiler is inactive. While +// paused the profile will not take any samples and will not record any data +// into its buffers. The profiler remains fully initialized in this state. +// Timeline markers will still be stored. This feature will keep JavaScript +// profiling enabled, thus allowing toggling the profiler without invalidating +// the JIT. +// Returns as soon as this process' profiler has paused/resumed, the returned +// promise gets resolved when profilers in sub-processes (if any) have +// paused/resumed. +RefPtr profiler_pause(); +RefPtr profiler_resume(); + +// Only pause and resume the periodic sampling loop, including stack sampling, +// counters, and profiling overheads. +// Returns as soon as this process' profiler has paused/resumed sampling, the +// returned promise gets resolved when profilers in sub-processes (if any) have +// paused/resumed sampling. +RefPtr profiler_pause_sampling(); +RefPtr profiler_resume_sampling(); + +//--------------------------------------------------------------------------- +// Get information from the profiler +//--------------------------------------------------------------------------- + +// Get the params used to start the profiler. Returns 0 and an empty vector +// (via outparams) if the profile is inactive. It's possible that the features +// returned may be slightly different to those requested due to required +// adjustments. +void profiler_get_start_params( + int* aEntrySize, mozilla::Maybe* aDuration, double* aInterval, + uint32_t* aFeatures, + mozilla::Vector* aFilters, + uint64_t* aActiveTabID); + +//--------------------------------------------------------------------------- +// RAII classes +//--------------------------------------------------------------------------- + +namespace mozilla { + +class MOZ_RAII AutoProfilerInit { + public: + explicit AutoProfilerInit() { profiler_init(this); } + + ~AutoProfilerInit() { profiler_shutdown(); } + + private: +}; + +class MOZ_RAII AutoProfilerInit2 { + public: + explicit AutoProfilerInit2() { profiler_init_threadmanager(); } + + private: +}; + +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // ProfilerControl_h diff --git a/tools/profiler/public/ProfilerCounts.h b/tools/profiler/public/ProfilerCounts.h new file mode 100644 index 0000000000..86f6cbfe4f --- /dev/null +++ b/tools/profiler/public/ProfilerCounts.h @@ -0,0 +1,296 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerCounts_h +#define ProfilerCounts_h + +#ifndef MOZ_GECKO_PROFILER + +# define PROFILER_DEFINE_COUNT_TOTAL(label, category, description) +# define PROFILER_DEFINE_COUNT(label, category, description) +# define PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description) +# define AUTO_PROFILER_TOTAL(label, count) +# define AUTO_PROFILER_COUNT(label) +# define AUTO_PROFILER_STATIC_COUNT(label, count) + +#else + +# include "mozilla/Atomics.h" + +class BaseProfilerCount; +void profiler_add_sampled_counter(BaseProfilerCount* aCounter); +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter); + +typedef mozilla::Atomic + ProfilerAtomicSigned; +typedef mozilla::Atomic + ProfilerAtomicUnsigned; + +// Counter support +// There are two types of counters: +// 1) a simple counter which can be added to or subtracted from. This could +// track the number of objects of a type, the number of calls to something +// (reflow, JIT, etc). +// 2) a combined counter which has the above, plus a number-of-calls counter +// that is incremented by 1 for each call to modify the count. This provides +// an optional source for a 'heatmap' of access. This can be used (for +// example) to track the amount of memory allocated, and provide a heatmap of +// memory operations (allocs/frees). +// +// Counters are sampled by the profiler once per sample-period. At this time, +// all counters are global to the process. In the future, there might be more +// versions with per-thread or other discriminators. +// +// Typical usage: +// There are two ways to use counters: With heap-created counter objects, +// or using macros. Note: the macros use statics, and will be slightly +// faster/smaller, and you need to care about creating them before using +// them. They're similar to the use-pattern for the other AUTO_PROFILER* +// macros, but they do need the PROFILER_DEFINE* to be use to instantiate +// the statics. +// +// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count") +// ... +// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... } +// +// or (to also get a heatmap) +// +// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count") +// ... +// void foo() { +// ... +// AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated); +// ... +// } +// +// To use without statics/macros: +// +// UniquePtr myCounter; +// ... +// myCounter = +// MakeUnique("mything", "JIT", "Some JIT byte count")); +// ... +// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... } + +class BaseProfilerCount { + public: + BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter, + ProfilerAtomicUnsigned* aNumber, const char* aCategory, + const char* aDescription) + : mLabel(aLabel), + mCategory(aCategory), + mDescription(aDescription), + mCounter(aCounter), + mNumber(aNumber) { +# define COUNTER_CANARY 0xDEADBEEF +# ifdef DEBUG + mCanary = COUNTER_CANARY; + mPrevNumber = 0; +# endif + // Can't call profiler_* here since this may be non-xul-library + } + + virtual ~BaseProfilerCount() { +# ifdef DEBUG + mCanary = 0; +# endif + } + + struct CountSample { + int64_t count; + uint64_t number; + // This field indicates if the sample has already been consummed by a call + // to the Sample() method. This allows the profiler to discard duplicate + // samples if the counter sampling rate is lower than the profiler sampling + // rate. This can happen for example with some power meters that sample up + // to every 10ms. + // It should always be true when calling Sample() for the first time. + bool isSampleNew; + }; + virtual CountSample Sample() { + MOZ_ASSERT(mCanary == COUNTER_CANARY); + + CountSample result; + result.count = *mCounter; + result.number = mNumber ? *mNumber : 0; +# ifdef DEBUG + MOZ_ASSERT(result.number >= mPrevNumber); + mPrevNumber = result.number; +# endif + result.isSampleNew = true; + return result; + } + + void Clear() { + *mCounter = 0; + // We don't reset *mNumber or mPrevNumber. We encode numbers as + // positive deltas, and currently we only care about the deltas (for + // e.g. heatmaps). If we ever need to clear mNumber as well, we can an + // alternative method (Reset()) to do so. + } + + // We don't define ++ and Add() here, since the static defines directly + // increment the atomic counters, and the subclasses implement ++ and + // Add() directly. + + // These typically are static strings (for example if you use the macros + // below) + const char* mLabel; + const char* mCategory; + const char* mDescription; + // We're ok with these being un-ordered in race conditions. These are + // pointers because we want to be able to use statics and increment them + // directly. Otherwise we could just have them inline, and not need the + // constructor args. + // These can be static globals (using the macros below), though they + // don't have to be - their lifetime must be longer than the use of them + // by the profiler (see profiler_add/remove_sampled_counter()). If you're + // using a lot of these, they probably should be allocated at runtime (see + // class ProfilerCountOnly below). + ProfilerAtomicSigned* mCounter; + ProfilerAtomicUnsigned* mNumber; // may be null + +# ifdef DEBUG + uint32_t mCanary; + uint64_t mPrevNumber; // value of number from the last Sample() +# endif +}; + +// Designed to be allocated dynamically, and simply incremented with obj++ +// or obj->Add(n) +class ProfilerCounter final : public BaseProfilerCount { + public: + ProfilerCounter(const char* aLabel, const char* aCategory, + const char* aDescription) + : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) { + // Assume we're in libxul + profiler_add_sampled_counter(this); + } + + virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); } + + BaseProfilerCount& operator++() { + Add(1); + return *this; + } + + void Add(int64_t aNumber) { mCounter += aNumber; } + + ProfilerAtomicSigned mCounter; +}; + +// Also keeps a heatmap (number of calls to ++/Add()) +class ProfilerCounterTotal final : public BaseProfilerCount { + public: + ProfilerCounterTotal(const char* aLabel, const char* aCategory, + const char* aDescription) + : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory, + aDescription) { + // Assume we're in libxul + profiler_add_sampled_counter(this); + } + + virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); } + + BaseProfilerCount& operator++() { + Add(1); + return *this; + } + + void Add(int64_t aNumber) { + mCounter += aNumber; + mNumber++; + } + + ProfilerAtomicSigned mCounter; + ProfilerAtomicUnsigned mNumber; +}; + +// Defines a counter that is sampled on each profiler tick, with a running +// count (signed), and number-of-instances. Note that because these are two +// independent Atomics, there is a possiblity that count will not include +// the last call, but number of uses will. I think this is not worth +// worrying about +# define PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + ProfilerAtomicUnsigned profiler_number_##label(0); \ + const char profiler_category_##label[] = category; \ + const char profiler_description_##label[] = description; \ + mozilla::UniquePtr AutoCount_##label; + +// This counts, but doesn't keep track of the number of calls to +// AUTO_PROFILER_COUNT() +# define PROFILER_DEFINE_COUNT(label, category, description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + const char profiler_category_##label[] = category; \ + const char profiler_description_##label[] = description; \ + mozilla::UniquePtr AutoCount_##label; + +// This will create a static initializer if used, but avoids a possible +// allocation. +# define PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + ProfilerAtomicUnsigned profiler_number_##label(0); \ + BaseProfilerCount AutoCount_##label(#label, &profiler_count_##label, \ + &profiler_number_##label, category, \ + description); + +// If we didn't care about static initializers, we could avoid the need for +// a ptr to the BaseProfilerCount object. + +// XXX It would be better to do this without the if() and without the +// theoretical race to set the UniquePtr (i.e. possible leak). +# define AUTO_PROFILER_COUNT_TOTAL(label, count) \ + do { \ + profiler_number_##label++; /* do this first*/ \ + profiler_count_##label += count; \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset(new BaseProfilerCount( \ + #label, &profiler_count_##label, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + profiler_add_sampled_counter(AutoCount_##label.get()); \ + } \ + } while (0) + +# define AUTO_PROFILER_COUNT(label, count) \ + do { \ + profiler_count_##label += count; /* do this first*/ \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset(new BaseProfilerCount( \ + #label, nullptr, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + profiler_add_sampled_counter(AutoCount_##label.get()); \ + } \ + } while (0) + +# define AUTO_PROFILER_STATIC_COUNT(label, count) \ + do { \ + profiler_number_##label++; /* do this first*/ \ + profiler_count_##label += count; \ + } while (0) + +// if we need to force the allocation +# define AUTO_PROFILER_FORCE_ALLOCATION(label) \ + do { \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset(new BaseProfilerCount( \ + #label, &profiler_count_##label, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + } \ + } while (0) + +#endif // !MOZ_GECKO_PROFILER + +#endif // ProfilerCounts_h diff --git a/tools/profiler/public/ProfilerLabels.h b/tools/profiler/public/ProfilerLabels.h new file mode 100644 index 0000000000..f05e357451 --- /dev/null +++ b/tools/profiler/public/ProfilerLabels.h @@ -0,0 +1,268 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains all definitions related to profiler labels. +// It is safe to include unconditionally, and only defines empty macros if +// MOZ_GECKO_PROFILER is not set. + +#ifndef ProfilerLabels_h +#define ProfilerLabels_h + +#include "mozilla/ProfilerThreadState.h" + +#include "js/ProfilingCategory.h" +#include "js/ProfilingStack.h" +#include "js/RootingAPI.h" +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/BaseProfilerRAIIMacro.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfilerThreadRegistration.h" +#include "mozilla/ThreadLocal.h" +#include "nsString.h" + +#include + +struct JSContext; + +// Insert an RAII object in this scope to enter a label stack frame. Any +// samples collected in this scope will contain this label in their stack. +// The label argument must be a static C string. It is usually of the +// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide +// that for us, but __func__ gives us the function name without the class +// name.) If the label applies to only part of a function, you can qualify it +// like this: "ClassName::FunctionName:PartName". +// +// Use AUTO_PROFILER_LABEL_DYNAMIC_* if you want to add additional / dynamic +// information to the label stack frame. +#define AUTO_PROFILER_LABEL(label, categoryPair) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + label, nullptr, JS::ProfilingCategoryPair::categoryPair) + +// Similar to AUTO_PROFILER_LABEL, but that adds the RELEVANT_FOR_JS flag. +#define AUTO_PROFILER_LABEL_RELEVANT_FOR_JS(label, categoryPair) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + label, nullptr, JS::ProfilingCategoryPair::categoryPair, \ + uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS)) + +// Similar to AUTO_PROFILER_LABEL, but with only one argument: the category +// pair. The label string is taken from the category pair. This is convenient +// for labels like AUTO_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding) +// which would otherwise just repeat the string. +#define AUTO_PROFILER_LABEL_CATEGORY_PAIR(categoryPair) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + "", nullptr, JS::ProfilingCategoryPair::categoryPair, \ + uint32_t( \ + js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) + +// Similar to AUTO_PROFILER_LABEL_CATEGORY_PAIR but adding the RELEVANT_FOR_JS +// flag. +#define AUTO_PROFILER_LABEL_CATEGORY_PAIR_RELEVANT_FOR_JS(categoryPair) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + "", nullptr, JS::ProfilingCategoryPair::categoryPair, \ + uint32_t( \ + js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR) | \ + uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS)) + +// Similar to AUTO_PROFILER_LABEL, but with an additional string. The inserted +// RAII object stores the cStr pointer in a field; it does not copy the string. +// +// WARNING: This means that the string you pass to this macro needs to live at +// least until the end of the current scope. Be careful using this macro with +// ns[C]String; the other AUTO_PROFILER_LABEL_DYNAMIC_* macros below are +// preferred because they avoid this problem. +// +// If the profiler samples the current thread and walks the label stack while +// this RAII object is on the stack, it will copy the supplied string into the +// profile buffer. So there's one string copy operation, and it happens at +// sample time. +// +// Compare this to the plain AUTO_PROFILER_LABEL macro, which only accepts +// literal strings: When the label stack frames generated by +// AUTO_PROFILER_LABEL are sampled, no string copy needs to be made because the +// profile buffer can just store the raw pointers to the literal strings. +// Consequently, AUTO_PROFILER_LABEL frames take up considerably less space in +// the profile buffer than AUTO_PROFILER_LABEL_DYNAMIC_* frames. +#define AUTO_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + label, cStr, JS::ProfilingCategoryPair::categoryPair) + +// Like AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but with the NONSENSITIVE flag to +// note that it does not contain sensitive information (so we can include it +// in, for example, the BackgroundHangMonitor) +#define AUTO_PROFILER_LABEL_DYNAMIC_CSTR_NONSENSITIVE(label, categoryPair, \ + cStr) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + label, cStr, JS::ProfilingCategoryPair::categoryPair, \ + uint32_t(js::ProfilingStackFrame::Flags::NONSENSITIVE)) + +// Similar to AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but takes an nsACString. +// +// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and +// the AutoProfilerLabel are appropriate, while also not incurring the runtime +// cost of the string assignment unless the profiler is active. Therefore, +// unlike AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC_CSTR, this macro +// doesn't push/pop a label when the profiler is inactive. +#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING(label, categoryPair, nsCStr) \ + mozilla::Maybe autoCStr; \ + mozilla::Maybe raiiObjectNsCString; \ + if (profiler_is_active()) { \ + autoCStr.emplace(nsCStr); \ + raiiObjectNsCString.emplace(label, autoCStr->get(), \ + JS::ProfilingCategoryPair::categoryPair); \ + } + +#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING_RELEVANT_FOR_JS( \ + label, categoryPair, nsCStr) \ + mozilla::Maybe autoCStr; \ + mozilla::Maybe raiiObjectNsCString; \ + if (profiler_is_active()) { \ + autoCStr.emplace(nsCStr); \ + raiiObjectNsCString.emplace( \ + label, autoCStr->get(), JS::ProfilingCategoryPair::categoryPair, \ + uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS)); \ + } + +// Match the conditions for MOZ_ENABLE_BACKGROUND_HANG_MONITOR +#if defined(NIGHTLY_BUILD) && !defined(MOZ_DEBUG) && !defined(MOZ_TSAN) && \ + !defined(MOZ_ASAN) +# define SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES true +#else +# define SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES profiler_is_active() +#endif + +// See note above AUTO_PROFILER_LABEL_DYNAMIC_CSTR_NONSENSITIVE +#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING_NONSENSITIVE( \ + label, categoryPair, nsCStr) \ + mozilla::Maybe autoCStr; \ + mozilla::Maybe raiiObjectNsCString; \ + if (SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES) { \ + autoCStr.emplace(nsCStr); \ + raiiObjectNsCString.emplace( \ + label, autoCStr->get(), JS::ProfilingCategoryPair::categoryPair, \ + uint32_t(js::ProfilingStackFrame::Flags::NONSENSITIVE)); \ + } + +// Similar to AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but takes an nsString that is +// is lossily converted to an ASCII string. +// +// Note: The use of the Maybe<>s ensures the scopes for the converted dynamic +// string and the AutoProfilerLabel are appropriate, while also not incurring +// the runtime cost of the string conversion unless the profiler is active. +// Therefore, unlike AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC_CSTR, +// this macro doesn't push/pop a label when the profiler is inactive. +#define AUTO_PROFILER_LABEL_DYNAMIC_LOSSY_NSSTRING(label, categoryPair, nsStr) \ + mozilla::Maybe asciiStr; \ + mozilla::Maybe raiiObjectLossyNsString; \ + if (profiler_is_active()) { \ + asciiStr.emplace(nsStr); \ + raiiObjectLossyNsString.emplace(label, asciiStr->get(), \ + JS::ProfilingCategoryPair::categoryPair); \ + } + +// Similar to AUTO_PROFILER_LABEL, but accepting a JSContext* parameter, and a +// no-op if the profiler is disabled. +// Used to annotate functions for which overhead in the range of nanoseconds is +// noticeable. It avoids overhead from the TLS lookup because it can get the +// ProfilingStack from the JS context, and avoids almost all overhead in the +// case where the profiler is disabled. +#define AUTO_PROFILER_LABEL_FAST(label, categoryPair, ctx) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + ctx, label, nullptr, JS::ProfilingCategoryPair::categoryPair) + +// Similar to AUTO_PROFILER_LABEL_FAST, but also takes an extra string and an +// additional set of flags. The flags parameter should carry values from the +// js::ProfilingStackFrame::Flags enum. +#define AUTO_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, categoryPair, \ + ctx, flags) \ + mozilla::AutoProfilerLabel PROFILER_RAII( \ + ctx, label, dynamicString, JS::ProfilingCategoryPair::categoryPair, \ + flags) + +namespace mozilla { + +#ifndef MOZ_GECKO_PROFILER + +class MOZ_RAII AutoProfilerLabel { + public: + // This is the AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC variant. + AutoProfilerLabel(const char* aLabel, const char* aDynamicString, + JS::ProfilingCategoryPair aCategoryPair, + uint32_t aFlags = 0) {} + + // This is the AUTO_PROFILER_LABEL_FAST variant. + AutoProfilerLabel(JSContext* aJSContext, const char* aLabel, + const char* aDynamicString, + JS::ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {} + + ~AutoProfilerLabel() {} +}; + +#else // !MOZ_GECKO_PROFILER + +// This class creates a non-owning ProfilingStack reference. Objects of this +// class are stack-allocated, and so exist within a thread, and are thus bounded +// by the lifetime of the thread, which ensures that the references held can't +// be used after the ProfilingStack is destroyed. +class MOZ_RAII AutoProfilerLabel { + public: + // This is the AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC variant. + AutoProfilerLabel(const char* aLabel, const char* aDynamicString, + JS::ProfilingCategoryPair aCategoryPair, + uint32_t aFlags = 0) { + // Get the ProfilingStack from TLS. + ProfilingStack* profilingStack = + profiler::ThreadRegistration::WithOnThreadRefOr( + [](profiler::ThreadRegistration::OnThreadRef aThread) { + return &aThread.UnlockedConstReaderAndAtomicRWRef() + .ProfilingStackRef(); + }, + nullptr); + Push(profilingStack, aLabel, aDynamicString, aCategoryPair, aFlags); + } + + // This is the AUTO_PROFILER_LABEL_FAST variant. It retrieves the + // ProfilingStack from the JSContext and does nothing if the profiler is + // inactive. + AutoProfilerLabel(JSContext* aJSContext, const char* aLabel, + const char* aDynamicString, + JS::ProfilingCategoryPair aCategoryPair, uint32_t aFlags) { + Push(js::GetContextProfilingStackIfEnabled(aJSContext), aLabel, + aDynamicString, aCategoryPair, aFlags); + } + + void Push(ProfilingStack* aProfilingStack, const char* aLabel, + const char* aDynamicString, JS::ProfilingCategoryPair aCategoryPair, + uint32_t aFlags = 0) { + // This function runs both on and off the main thread. + + mProfilingStack = aProfilingStack; + if (mProfilingStack) { + mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this, + aCategoryPair, aFlags); + } + } + + ~AutoProfilerLabel() { + // This function runs both on and off the main thread. + + if (mProfilingStack) { + mProfilingStack->pop(); + } + } + + private: + // We save a ProfilingStack pointer in the ctor so we don't have to redo the + // TLS lookup in the dtor. + ProfilingStack* mProfilingStack; +}; + +#endif // !MOZ_GECKO_PROFILER + +} // namespace mozilla + +#endif // ProfilerLabels_h diff --git a/tools/profiler/public/ProfilerMarkerTypes.h b/tools/profiler/public/ProfilerMarkerTypes.h new file mode 100644 index 0000000000..0868c70e30 --- /dev/null +++ b/tools/profiler/public/ProfilerMarkerTypes.h @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerMarkerTypes_h +#define ProfilerMarkerTypes_h + +// This header contains common marker type definitions that rely on xpcom. +// +// It #include's "mozilla/BaseProfilerMarkerTypess.h" and "ProfilerMarkers.h", +// see these files for more marker types, how to define other marker types, and +// how to add markers to the profiler buffers. + +// !!! /!\ WORK IN PROGRESS /!\ !!! +// This file contains draft marker definitions, but most are not used yet. +// Further work is needed to complete these definitions, and use them to convert +// existing PROFILER_ADD_MARKER calls. See meta bug 1661394. + +#include "mozilla/BaseProfilerMarkerTypes.h" +#include "mozilla/ProfilerMarkers.h" +#include "js/ProfilingFrameIterator.h" +#include "js/Utility.h" +#include "mozilla/Preferences.h" +#include "mozilla/ServoTraversalStatistics.h" + +namespace geckoprofiler::markers { + +// Import some common markers from mozilla::baseprofiler::markers. +using MediaSampleMarker = mozilla::baseprofiler::markers::MediaSampleMarker; +using VideoFallingBehindMarker = + mozilla::baseprofiler::markers::VideoFallingBehindMarker; +using ContentBuildMarker = mozilla::baseprofiler::markers::ContentBuildMarker; +using MediaEngineMarker = mozilla::baseprofiler::markers::MediaEngineMarker; +using MediaEngineTextMarker = + mozilla::baseprofiler::markers::MediaEngineTextMarker; + +} // namespace geckoprofiler::markers + +#endif // ProfilerMarkerTypes_h diff --git a/tools/profiler/public/ProfilerMarkers.h b/tools/profiler/public/ProfilerMarkers.h new file mode 100644 index 0000000000..ca53c3f189 --- /dev/null +++ b/tools/profiler/public/ProfilerMarkers.h @@ -0,0 +1,355 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Markers are useful to delimit something important happening such as the first +// paint. Unlike labels, which are only recorded in the profile buffer if a +// sample is collected while the label is on the label stack, markers will +// always be recorded in the profile buffer. +// +// This header contains definitions necessary to add markers to the Gecko +// Profiler buffer. +// +// It #include's "mozilla/BaseProfilerMarkers.h", see that header for base +// definitions necessary to create marker types. +// +// If common marker types are needed, #include "ProfilerMarkerTypes.h" instead. +// +// But if you want to create your own marker type locally, you can #include this +// header only; look at ProfilerMarkerTypes.h for examples of how to define +// types. +// +// To then record markers: +// - Use `baseprofiler::AddMarker(...)` from mozglue or other libraries that are +// outside of xul, especially if they may happen outside of xpcom's lifetime +// (typically startup, shutdown, or tests). +// - Otherwise #include "ProfilerMarkers.h" instead, and use +// `profiler_add_marker(...)`. +// See these functions for more details. + +#ifndef ProfilerMarkers_h +#define ProfilerMarkers_h + +#include "mozilla/BaseProfilerMarkers.h" +#include "mozilla/ProfilerMarkersDetail.h" +#include "mozilla/ProfilerLabels.h" +#include "nsJSUtils.h" // for nsJSUtils::GetCurrentlyRunningCodeInnerWindowID + +class nsIDocShell; + +namespace geckoprofiler::markers::detail { +// Please do not use anything from the detail namespace outside the profiler. + +#ifdef MOZ_GECKO_PROFILER +mozilla::Maybe profiler_get_inner_window_id_from_docshell( + nsIDocShell* aDocshell); +#else +inline mozilla::Maybe profiler_get_inner_window_id_from_docshell( + nsIDocShell* aDocshell) { + return mozilla::Nothing(); +} +#endif // MOZ_GECKO_PROFILER + +} // namespace geckoprofiler::markers::detail + +// This is a helper function to get the Inner Window ID from DocShell but it's +// not a recommended method to get it and it's not encouraged to use this +// function. If there is a computed inner window ID, `window`, or `Document` +// available in the call site, please use them. Use this function as a last +// resort. +inline mozilla::MarkerInnerWindowId MarkerInnerWindowIdFromDocShell( + nsIDocShell* aDocshell) { + mozilla::Maybe id = geckoprofiler::markers::detail:: + profiler_get_inner_window_id_from_docshell(aDocshell); + if (!id) { + return mozilla::MarkerInnerWindowId::NoId(); + } + return mozilla::MarkerInnerWindowId(*id); +} + +// This is a helper function to get the Inner Window ID from a JS Context but +// it's not a recommended method to get it and it's not encouraged to use this +// function. If there is a computed inner window ID, `window`, or `Document` +// available in the call site, please use them. Use this function as a last +// resort. +inline mozilla::MarkerInnerWindowId MarkerInnerWindowIdFromJSContext( + JSContext* aContext) { + return mozilla::MarkerInnerWindowId( + nsJSUtils::GetCurrentlyRunningCodeInnerWindowID(aContext)); +} + +// Bring category names from Base Profiler into the geckoprofiler::category +// namespace, for consistency with other Gecko Profiler identifiers. +namespace geckoprofiler::category { +using namespace ::mozilla::baseprofiler::category; +} + +#ifdef MOZ_GECKO_PROFILER +// Forward-declaration. TODO: Move to more common header, see bug 1681416. +bool profiler_capture_backtrace_into( + mozilla::ProfileChunkedBuffer& aChunkedBuffer, + mozilla::StackCaptureOptions aCaptureOptions); + +// Add a marker to a given buffer. `AddMarker()` and related macros should be +// used in most cases, see below for more information about them and the +// paramters; This function may be useful when markers need to be recorded in a +// local buffer outside of the main profiler buffer. +template +mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer& aBuffer, + const mozilla::ProfilerString8View& aName, + const mozilla::MarkerCategory& aCategory, mozilla::MarkerOptions&& aOptions, + MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) { + AUTO_PROFILER_LABEL("AddMarkerToBuffer", PROFILER); + mozilla::Unused << aMarkerType; // Only the empty object type is useful. + return mozilla::base_profiler_markers_detail::AddMarkerToBuffer( + aBuffer, aName, aCategory, std::move(aOptions), + profiler_active_without_feature(ProfilerFeature::NoMarkerStacks) + ? ::profiler_capture_backtrace_into + : nullptr, + aPayloadArguments...); +} + +// Add a marker (without payload) to a given buffer. +inline mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer& aBuffer, + const mozilla::ProfilerString8View& aName, + const mozilla::MarkerCategory& aCategory, + mozilla::MarkerOptions&& aOptions = {}) { + return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions), + mozilla::baseprofiler::markers::NoPayload{}); +} +#endif + +[[nodiscard]] inline bool profiler_thread_is_being_profiled_for_markers() { + return profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers); +} + +[[nodiscard]] inline bool profiler_thread_is_being_profiled_for_markers( + const ProfilerThreadId& aThreadId) { + return profiler_thread_is_being_profiled(aThreadId, + ThreadProfilingFeatures::Markers); +} + +// Add a marker to the Gecko Profiler buffer. +// - aName: Main name of this marker. +// - aCategory: Category for this marker. +// - aOptions: Optional settings (such as timing, inner window id, +// backtrace...), see `MarkerOptions` for details. +// - aMarkerType: Empty object that specifies the type of marker. +// - aPayloadArguments: Arguments expected by this marker type's +// ` StreamJSONMarkerData` function. +template +mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View& aName, + const mozilla::MarkerCategory& aCategory, mozilla::MarkerOptions&& aOptions, + MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) { +#ifndef MOZ_GECKO_PROFILER + return {}; +#else + if (!profiler_thread_is_being_profiled_for_markers( + aOptions.ThreadId().ThreadId())) { + return {}; + } + AUTO_PROFILER_LABEL("profiler_add_marker", PROFILER); + return ::AddMarkerToBuffer(profiler_get_core_buffer(), aName, aCategory, + std::move(aOptions), aMarkerType, + aPayloadArguments...); +#endif +} + +// Add a marker (without payload) to the Gecko Profiler buffer. +inline mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View& aName, + const mozilla::MarkerCategory& aCategory, + mozilla::MarkerOptions&& aOptions = {}) { + return profiler_add_marker(aName, aCategory, std::move(aOptions), + mozilla::baseprofiler::markers::NoPayload{}); +} + +// Same as `profiler_add_marker()` (without payload). This macro is safe to use +// even if MOZ_GECKO_PROFILER is not #defined. +#define PROFILER_MARKER_UNTYPED(markerName, categoryName, ...) \ + do { \ + AUTO_PROFILER_STATS(PROFILER_MARKER_UNTYPED); \ + ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \ + ##__VA_ARGS__); \ + } while (false) + +// Same as `profiler_add_marker()` (with payload). This macro is safe to use +// even if MOZ_GECKO_PROFILER is not #defined. +#define PROFILER_MARKER(markerName, categoryName, options, MarkerType, ...) \ + do { \ + AUTO_PROFILER_STATS(PROFILER_MARKER_with_##MarkerType); \ + ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \ + options, ::geckoprofiler::markers::MarkerType{}, \ + ##__VA_ARGS__); \ + } while (false) + +namespace geckoprofiler::markers { +// Most common marker types. Others are in ProfilerMarkerTypes.h. +using TextMarker = ::mozilla::baseprofiler::markers::TextMarker; +using Tracing = mozilla::baseprofiler::markers::Tracing; +} // namespace geckoprofiler::markers + +// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is +// not #defined. +#define PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \ + do { \ + AUTO_PROFILER_STATS(PROFILER_MARKER_TEXT); \ + ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \ + options, ::geckoprofiler::markers::TextMarker{}, \ + text); \ + } while (false) + +// RAII object that adds a PROFILER_MARKER_TEXT when destroyed; the marker's +// timing will be the interval from construction (unless an instant or start +// time is already specified in the provided options) until destruction. +class MOZ_RAII AutoProfilerTextMarker { + public: + AutoProfilerTextMarker(const char* aMarkerName, + const mozilla::MarkerCategory& aCategory, + mozilla::MarkerOptions&& aOptions, + const nsACString& aText) + : mMarkerName(aMarkerName), + mCategory(aCategory), + mOptions(std::move(aOptions)), + mText(aText) { + MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(), + "AutoProfilerTextMarker options shouldn't have an end time"); + if (profiler_is_active_and_unpaused() && + mOptions.Timing().StartTime().IsNull()) { + mOptions.Set(mozilla::MarkerTiming::InstantNow()); + } + } + + ~AutoProfilerTextMarker() { + if (profiler_is_active_and_unpaused()) { + AUTO_PROFILER_LABEL("TextMarker", PROFILER); + mOptions.TimingRef().SetIntervalEnd(); + AUTO_PROFILER_STATS(AUTO_PROFILER_MARKER_TEXT); + profiler_add_marker( + mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName), + mCategory, std::move(mOptions), geckoprofiler::markers::TextMarker{}, + mText); + } + } + + protected: + const char* mMarkerName; + mozilla::MarkerCategory mCategory; + mozilla::MarkerOptions mOptions; + nsCString mText; +}; + +// Creates an AutoProfilerTextMarker RAII object. This macro is safe to use +// even if MOZ_GECKO_PROFILER is not #defined. +#define AUTO_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \ + AutoProfilerTextMarker PROFILER_RAII( \ + markerName, ::mozilla::baseprofiler::category::categoryName, options, \ + text) + +class MOZ_RAII AutoProfilerTracing { + public: + AutoProfilerTracing(const char* aCategoryString, const char* aMarkerName, + mozilla::MarkerCategory aCategoryPair, + const mozilla::Maybe& aInnerWindowID) + : mCategoryString(aCategoryString), + mMarkerName(aMarkerName), + mCategoryPair(aCategoryPair), + mInnerWindowID(aInnerWindowID) { + profiler_add_marker( + mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName), + mCategoryPair, + {mozilla::MarkerTiming::IntervalStart(), + mozilla::MarkerInnerWindowId(mInnerWindowID)}, + geckoprofiler::markers::Tracing{}, + mozilla::ProfilerString8View::WrapNullTerminatedString( + mCategoryString)); + } + + AutoProfilerTracing( + const char* aCategoryString, const char* aMarkerName, + mozilla::MarkerCategory aCategoryPair, + mozilla::UniquePtr aBacktrace, + const mozilla::Maybe& aInnerWindowID) + : mCategoryString(aCategoryString), + mMarkerName(aMarkerName), + mCategoryPair(aCategoryPair), + mInnerWindowID(aInnerWindowID) { + profiler_add_marker( + mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName), + mCategoryPair, + {mozilla::MarkerTiming::IntervalStart(), + mozilla::MarkerInnerWindowId(mInnerWindowID), + mozilla::MarkerStack::TakeBacktrace(std::move(aBacktrace))}, + geckoprofiler::markers::Tracing{}, + mozilla::ProfilerString8View::WrapNullTerminatedString( + mCategoryString)); + } + + ~AutoProfilerTracing() { + profiler_add_marker( + mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName), + mCategoryPair, + {mozilla::MarkerTiming::IntervalEnd(), + mozilla::MarkerInnerWindowId(mInnerWindowID)}, + geckoprofiler::markers::Tracing{}, + mozilla::ProfilerString8View::WrapNullTerminatedString( + mCategoryString)); + } + + protected: + const char* mCategoryString; + const char* mMarkerName; + const mozilla::MarkerCategory mCategoryPair; + const mozilla::Maybe mInnerWindowID; +}; + +// Adds a START/END pair of tracing markers. +#define AUTO_PROFILER_TRACING_MARKER(categoryString, markerName, categoryPair) \ + AutoProfilerTracing PROFILER_RAII(categoryString, markerName, \ + geckoprofiler::category::categoryPair, \ + mozilla::Nothing()) +#define AUTO_PROFILER_TRACING_MARKER_INNERWINDOWID( \ + categoryString, markerName, categoryPair, innerWindowId) \ + AutoProfilerTracing PROFILER_RAII(categoryString, markerName, \ + geckoprofiler::category::categoryPair, \ + mozilla::Some(innerWindowId)) +#define AUTO_PROFILER_TRACING_MARKER_DOCSHELL(categoryString, markerName, \ + categoryPair, docShell) \ + AutoProfilerTracing PROFILER_RAII( \ + categoryString, markerName, geckoprofiler::category::categoryPair, \ + geckoprofiler::markers::detail:: \ + profiler_get_inner_window_id_from_docshell(docShell)) + +#ifdef MOZ_GECKO_PROFILER +extern template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::NoPayload); + +extern template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::TextMarker, const std::string&); + +extern template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const std::string&); + +extern template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const nsCString&); + +extern template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::Tracing, + const mozilla::ProfilerString8View&); +#endif // MOZ_GECKO_PROFILER + +#endif // ProfilerMarkers_h diff --git a/tools/profiler/public/ProfilerMarkersDetail.h b/tools/profiler/public/ProfilerMarkersDetail.h new file mode 100644 index 0000000000..2308a14bb2 --- /dev/null +++ b/tools/profiler/public/ProfilerMarkersDetail.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerMarkersDetail_h +#define ProfilerMarkersDetail_h + +#ifndef ProfilerMarkers_h +# error "This header should only be #included by ProfilerMarkers.h" +#endif + +#include "mozilla/ProfilerMarkersPrerequisites.h" + +#ifdef MOZ_GECKO_PROFILER + +// ~~ HERE BE DRAGONS ~~ +// +// Everything below is internal implementation detail, you shouldn't need to +// look at it unless working on the profiler code. + +// Header that specializes the (de)serializers for xpcom types. +# include "mozilla/ProfileBufferEntrySerializationGeckoExtensions.h" + +// Implemented in platform.cpp +mozilla::ProfileChunkedBuffer& profiler_get_core_buffer(); + +#endif // MOZ_GECKO_PROFILER + +#endif // ProfilerMarkersDetail_h diff --git a/tools/profiler/public/ProfilerMarkersPrerequisites.h b/tools/profiler/public/ProfilerMarkersPrerequisites.h new file mode 100644 index 0000000000..0f10f7efe2 --- /dev/null +++ b/tools/profiler/public/ProfilerMarkersPrerequisites.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains basic definitions required to create marker types, and +// to add markers to the profiler buffers. +// +// In most cases, #include "mozilla/ProfilerMarkers.h" instead, or +// #include "mozilla/ProfilerMarkerTypes.h" for common marker types. + +#ifndef ProfilerMarkersPrerequisites_h +#define ProfilerMarkersPrerequisites_h + +#include "mozilla/BaseProfilerMarkersPrerequisites.h" +#include "mozilla/ProfilerThreadState.h" + +#ifdef MOZ_GECKO_PROFILER + +namespace geckoprofiler::markers { + +// Default marker payload types, with no extra information, not even a marker +// type and payload. This is intended for label-only markers. +using NoPayload = ::mozilla::baseprofiler::markers::NoPayload; + +} // namespace geckoprofiler::markers + +#endif // MOZ_GECKO_PROFILER + +#endif // ProfilerMarkersPrerequisites_h diff --git a/tools/profiler/public/ProfilerParent.h b/tools/profiler/public/ProfilerParent.h new file mode 100644 index 0000000000..8bd5c71721 --- /dev/null +++ b/tools/profiler/public/ProfilerParent.h @@ -0,0 +1,119 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerParent_h +#define ProfilerParent_h + +#include "mozilla/PProfilerParent.h" +#include "mozilla/RefPtr.h" + +class nsIProfilerStartParams; + +namespace mozilla { + +class ProfileBufferGlobalController; +class ProfilerParentTracker; + +// This is the main process side of the PProfiler protocol. +// ProfilerParent instances only exist on the main thread of the main process. +// The other side (ProfilerChild) lives on a background thread in the other +// process. +// The creation of PProfiler actors is initiated from the main process, after +// the other process has been launched. +// ProfilerParent instances are destroyed once the message channel closes, +// which can be triggered by either process, depending on which one shuts down +// first. +// All ProfilerParent instances are registered with a manager class called +// ProfilerParentTracker, which has the list of living ProfilerParent instances +// and handles shutdown. +class ProfilerParent final : public PProfilerParent { + public: + NS_INLINE_DECL_REFCOUNTING(ProfilerParent, final) + + static mozilla::ipc::Endpoint CreateForProcess( + base::ProcessId aOtherPid); + +#ifdef MOZ_GECKO_PROFILER + using SingleProcessProfilePromise = + MozPromise; + + struct SingleProcessProfilePromiseAndChildPid { + RefPtr profilePromise; + base::ProcessId childPid; + }; + + using SingleProcessProgressPromise = + MozPromise; + + // The following static methods can be called on any thread, but they are + // no-ops on anything other than the main thread. + // If called on the main thread, the call will be broadcast to all + // registered processes (all processes for which we have a ProfilerParent + // object). + // At the moment, the main process always calls these methods on the main + // thread, and that's the only process in which we need to forward these + // calls to other processes. The other processes will call these methods on + // the ProfilerChild background thread, but those processes don't need to + // forward these calls any further. + + // Returns the profiles to expect, as promises and child pids. + static nsTArray GatherProfiles(); + + // Send a request to get the GatherProfiles() progress update from one child + // process, returns a promise to be resolved with that progress. + // The promise RefPtr may be null if the child process is unknown. + // Progress may be invalid, if the request arrived after the child process + // had already responded to the main GatherProfile() IPC, or something went + // very wrong in that process. + static RefPtr RequestGatherProfileProgress( + base::ProcessId aChildPid); + + // This will start the profiler in all child processes. The returned promise + // will be resolved when all child have completed their operation + // (successfully or not.) + [[nodiscard]] static RefPtr ProfilerStarted( + nsIProfilerStartParams* aParams); + static void ProfilerWillStopIfStarted(); + [[nodiscard]] static RefPtr ProfilerStopped(); + [[nodiscard]] static RefPtr ProfilerPaused(); + [[nodiscard]] static RefPtr ProfilerResumed(); + [[nodiscard]] static RefPtr ProfilerPausedSampling(); + [[nodiscard]] static RefPtr ProfilerResumedSampling(); + static void ClearAllPages(); + + [[nodiscard]] static RefPtr WaitOnePeriodicSampling(); + + // Create a "Final" update that the Child can return to its Parent. + static ProfileBufferChunkManagerUpdate MakeFinalUpdate(); + + // True if the ProfilerParent holds a lock on this thread. + static bool IsLockedOnCurrentThread(); + + private: + friend class ProfileBufferGlobalController; + friend class ProfilerParentTracker; + + explicit ProfilerParent(base::ProcessId aChildPid); + + void Init(); + void ActorDestroy(ActorDestroyReason aActorDestroyReason) override; + + void RequestChunkManagerUpdate(); + + base::ProcessId mChildPid; + nsTArray> + mPendingRequestedProfiles; + bool mDestroyed; +#endif // MOZ_GECKO_PROFILER + + private: + virtual ~ProfilerParent(); +}; + +} // namespace mozilla + +#endif // ProfilerParent_h diff --git a/tools/profiler/public/ProfilerRunnable.h b/tools/profiler/public/ProfilerRunnable.h new file mode 100644 index 0000000000..b3b4e64043 --- /dev/null +++ b/tools/profiler/public/ProfilerRunnable.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerRunnable_h +#define ProfilerRunnable_h + +#include "GeckoProfiler.h" +#include "nsIThreadPool.h" + +#if !defined(MOZ_GECKO_PROFILER) || !defined(MOZ_COLLECTING_RUNNABLE_TELEMETRY) +# define AUTO_PROFILE_FOLLOWING_RUNNABLE(runnable) +#else +# define AUTO_PROFILE_FOLLOWING_RUNNABLE(runnable) \ + mozilla::Maybe raiiRunnableMarker; \ + if (profiler_thread_is_being_profiled_for_markers()) { \ + raiiRunnableMarker.emplace(runnable); \ + } + +namespace mozilla { + +class MOZ_RAII AutoProfileRunnable { + public: + explicit AutoProfileRunnable(Runnable* aRunnable) + : mStartTime(TimeStamp::Now()) { + aRunnable->GetName(mName); + } + explicit AutoProfileRunnable(nsIRunnable* aRunnable) + : mStartTime(TimeStamp::Now()) { + nsCOMPtr threadPool = do_QueryInterface(aRunnable); + if (threadPool) { + // nsThreadPool::Run has its own call to AUTO_PROFILE_FOLLOWING_RUNNABLE, + // avoid nesting runnable markers. + return; + } + + nsCOMPtr named = do_QueryInterface(aRunnable); + if (named) { + named->GetName(mName); + } + } + explicit AutoProfileRunnable(nsACString& aName) + : mStartTime(TimeStamp::Now()), mName(aName) {} + + ~AutoProfileRunnable() { + if (mName.IsEmpty()) { + return; + } + + AUTO_PROFILER_LABEL("AutoProfileRunnable", PROFILER); + AUTO_PROFILER_STATS(AUTO_PROFILE_RUNNABLE); + profiler_add_marker("Runnable", ::mozilla::baseprofiler::category::OTHER, + MarkerTiming::IntervalUntilNowFrom(mStartTime), + geckoprofiler::markers::TextMarker{}, mName); + } + + protected: + TimeStamp mStartTime; + nsAutoCString mName; +}; + +} // namespace mozilla + +#endif + +#endif // ProfilerRunnable_h diff --git a/tools/profiler/public/ProfilerRustBindings.h b/tools/profiler/public/ProfilerRustBindings.h new file mode 100644 index 0000000000..bf290838a1 --- /dev/null +++ b/tools/profiler/public/ProfilerRustBindings.h @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#ifndef ProfilerRustBindings_h +#define ProfilerRustBindings_h + +#include "mozilla/profiler_ffi_generated.h" + +// Add any non-generated support code here + +#endif // ProfilerRustBindings_h diff --git a/tools/profiler/public/ProfilerState.h b/tools/profiler/public/ProfilerState.h new file mode 100644 index 0000000000..7a9f3f5c73 --- /dev/null +++ b/tools/profiler/public/ProfilerState.h @@ -0,0 +1,399 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains most functions that give information about the Profiler: +// Whether it is active or not, paused, and the selected features. +// It is safe to include unconditionally, but uses of structs and functions must +// be guarded by `#ifdef MOZ_GECKO_PROFILER`. + +#ifndef ProfilerState_h +#define ProfilerState_h + +#include +#include +#include "mozilla/ProfilerUtils.h" + +#include + +//--------------------------------------------------------------------------- +// Profiler features +//--------------------------------------------------------------------------- + +#if defined(__APPLE__) && defined(__aarch64__) +# define POWER_HELP "Sample per process power use" +#elif defined(__APPLE__) && defined(__x86_64__) +# define POWER_HELP \ + "Record the power used by the entire system with each sample." +#elif defined(__linux__) && defined(__x86_64__) +# define POWER_HELP \ + "Record the power used by the entire system with each sample. " \ + "Only available with Intel CPUs and requires setting " \ + "the sysctl kernel.perf_event_paranoid to 0." + +#elif defined(_MSC_VER) +# define POWER_HELP \ + "Record the value of every energy meter available on the system with " \ + "each sample. Only available on Windows 11 with Intel CPUs." +#else +# define POWER_HELP "Not supported on this platform." +#endif + +// Higher-order macro containing all the feature info in one place. Define +// |MACRO| appropriately to extract the relevant parts. Note that the number +// values are used internally only and so can be changed without consequence. +// Any changes to this list should also be applied to the feature list in +// toolkit/components/extensions/schemas/geckoProfiler.json. +// *** Synchronize with lists in BaseProfilerState.h and geckoProfiler.json *** +#define PROFILER_FOR_EACH_FEATURE(MACRO) \ + MACRO(0, "java", Java, "Profile Java code, Android only") \ + \ + MACRO(1, "js", JS, \ + "Get the JS engine to expose the JS stack to the profiler") \ + \ + MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O") \ + \ + MACRO(3, "fileio", FileIO, \ + "Add file I/O from all profiled threads, implies mainthreadio") \ + \ + MACRO(4, "fileioall", FileIOAll, \ + "Add file I/O from all threads, implies fileio") \ + \ + MACRO(5, "nomarkerstacks", NoMarkerStacks, \ + "Markers do not capture stacks, to reduce overhead") \ + \ + MACRO(6, "screenshots", Screenshots, \ + "Take a snapshot of the window on every composition") \ + \ + MACRO(7, "seqstyle", SequentialStyle, \ + "Disable parallel traversal in styling") \ + \ + MACRO(8, "stackwalk", StackWalk, \ + "Walk the C++ stack, not available on all platforms") \ + \ + MACRO(9, "jsallocations", JSAllocations, \ + "Have the JavaScript engine track allocations") \ + \ + MACRO(10, "nostacksampling", NoStackSampling, \ + "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and " \ + "labels") \ + \ + MACRO(11, "nativeallocations", NativeAllocations, \ + "Collect the stacks from a smaller subset of all native " \ + "allocations, biasing towards collecting larger allocations") \ + \ + MACRO(12, "ipcmessages", IPCMessages, \ + "Have the IPC layer track cross-process messages") \ + \ + MACRO(13, "audiocallbacktracing", AudioCallbackTracing, \ + "Audio callback tracing") \ + \ + MACRO(14, "cpu", CPUUtilization, "CPU utilization") \ + \ + MACRO(15, "notimerresolutionchange", NoTimerResolutionChange, \ + "Do not adjust the timer resolution for sampling, so that other " \ + "Firefox timers do not get affected") \ + \ + MACRO(16, "cpuallthreads", CPUAllThreads, \ + "Sample the CPU utilization of all registered threads") \ + \ + MACRO(17, "samplingallthreads", SamplingAllThreads, \ + "Sample the stacks of all registered threads") \ + \ + MACRO(18, "markersallthreads", MarkersAllThreads, \ + "Record markers from all registered threads") \ + \ + MACRO(19, "unregisteredthreads", UnregisteredThreads, \ + "Discover and profile unregistered threads -- beware: expensive!") \ + \ + MACRO(20, "processcpu", ProcessCPU, \ + "Sample the CPU utilization of each process") \ + \ + MACRO(21, "power", Power, POWER_HELP) +// *** Synchronize with lists in BaseProfilerState.h and geckoProfiler.json *** + +struct ProfilerFeature { +#define DECLARE(n_, str_, Name_, desc_) \ + static constexpr uint32_t Name_ = (1u << n_); \ + [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \ + return aFeatures & Name_; \ + } \ + static constexpr void Set##Name_(uint32_t& aFeatures) { \ + aFeatures |= Name_; \ + } \ + static constexpr void Clear##Name_(uint32_t& aFeatures) { \ + aFeatures &= ~Name_; \ + } + + // Define a bitfield constant, a getter, and two setters for each feature. + PROFILER_FOR_EACH_FEATURE(DECLARE) + +#undef DECLARE +}; + +// clang-format off +MOZ_DEFINE_ENUM_CLASS(ProfilingState,( + // A callback will be invoked ... + AlreadyActive, // if the profiler is active when the callback is added. + RemovingCallback, // when the callback is removed. + Started, // after the profiler has started. + Pausing, // before the profiler is paused. + Resumed, // after the profiler has resumed. + GeneratingProfile, // before a profile is created. + Stopping, // before the profiler stops (unless restarting afterward). + ShuttingDown // before the profiler is shut down. +)); +// clang-format on + +[[nodiscard]] inline static const char* ProfilingStateToString( + ProfilingState aProfilingState) { + switch (aProfilingState) { + case ProfilingState::AlreadyActive: + return "Profiler already active"; + case ProfilingState::RemovingCallback: + return "Callback being removed"; + case ProfilingState::Started: + return "Profiler started"; + case ProfilingState::Pausing: + return "Profiler pausing"; + case ProfilingState::Resumed: + return "Profiler resumed"; + case ProfilingState::GeneratingProfile: + return "Generating profile"; + case ProfilingState::Stopping: + return "Profiler stopping"; + case ProfilingState::ShuttingDown: + return "Profiler shutting down"; + default: + MOZ_ASSERT_UNREACHABLE("Unexpected ProfilingState enum value"); + return "?"; + } +} + +using ProfilingStateSet = mozilla::EnumSet; + +[[nodiscard]] constexpr ProfilingStateSet AllProfilingStates() { + ProfilingStateSet set; + using Value = std::underlying_type_t; + for (Value stateValue = 0; + stateValue <= static_cast(kHighestProfilingState); ++stateValue) { + set += static_cast(stateValue); + } + return set; +} + +// Type of callbacks to be invoked at certain state changes. +// It must NOT call profiler_add/remove_state_change_callback(). +using ProfilingStateChangeCallback = std::function; + +#ifndef MOZ_GECKO_PROFILER + +[[nodiscard]] inline bool profiler_is_active() { return false; } +[[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; } +[[nodiscard]] inline bool profiler_feature_active(uint32_t aFeature) { + return false; +} +[[nodiscard]] inline bool profiler_is_locked_on_current_thread() { + return false; +} +inline void profiler_add_state_change_callback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aCallback, uintptr_t aUniqueIdentifier = 0) { +} +inline void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) { +} + +#else // !MOZ_GECKO_PROFILER + +# include "mozilla/Atomics.h" +# include "mozilla/Maybe.h" + +# include + +namespace mozilla::profiler::detail { + +// RacyFeatures is only defined in this header file so that its methods can +// be inlined into profiler_is_active(). Please do not use anything from the +// detail namespace outside the profiler. + +// Within the profiler's code, the preferred way to check profiler activeness +// and features is via ActivePS(). However, that requires locking gPSMutex. +// There are some hot operations where absolute precision isn't required, so we +// duplicate the activeness/feature state in a lock-free manner in this class. +class RacyFeatures { + public: + static void SetActive(uint32_t aFeatures) { + sActiveAndFeatures = Active | aFeatures; + } + + static void SetInactive() { sActiveAndFeatures = 0; } + + static void SetPaused() { sActiveAndFeatures |= Paused; } + + static void SetUnpaused() { sActiveAndFeatures &= ~Paused; } + + static void SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; } + + static void SetSamplingUnpaused() { sActiveAndFeatures &= ~SamplingPaused; } + + [[nodiscard]] static mozilla::Maybe FeaturesIfActive() { + if (uint32_t af = sActiveAndFeatures; af & Active) { + // Active, remove the Active&Paused bits to get all features. + return Some(af & ~(Active | Paused | SamplingPaused)); + } + return Nothing(); + } + + [[nodiscard]] static mozilla::Maybe FeaturesIfActiveAndUnpaused() { + if (uint32_t af = sActiveAndFeatures; (af & (Active | Paused)) == Active) { + // Active but not fully paused, remove the Active and sampling-paused bits + // to get all features. + return Some(af & ~(Active | SamplingPaused)); + } + return Nothing(); + } + + // This implementation must be kept in sync with `gecko_profiler::is_active` + // in the Profiler Rust API. + [[nodiscard]] static bool IsActive() { + return uint32_t(sActiveAndFeatures) & Active; + } + + [[nodiscard]] static bool IsActiveWithFeature(uint32_t aFeature) { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && (af & aFeature); + } + + [[nodiscard]] static bool IsActiveWithoutFeature(uint32_t aFeature) { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & aFeature); + } + + // True if profiler is active, and not fully paused. + // Note that periodic sampling *could* be paused! + // This implementation must be kept in sync with + // `gecko_profiler::can_accept_markers` in the Profiler Rust API. + [[nodiscard]] static bool IsActiveAndUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & Paused); + } + + // True if profiler is active, and sampling is not paused (though generic + // `SetPaused()` or specific `SetSamplingPaused()`). + [[nodiscard]] static bool IsActiveAndSamplingUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & (Paused | SamplingPaused)); + } + + private: + static constexpr uint32_t Active = 1u << 31; + static constexpr uint32_t Paused = 1u << 30; + static constexpr uint32_t SamplingPaused = 1u << 29; + +// Ensure Active/Paused don't overlap with any of the feature bits. +# define NO_OVERLAP(n_, str_, Name_, desc_) \ + static_assert(ProfilerFeature::Name_ != SamplingPaused, \ + "bad feature value"); + + PROFILER_FOR_EACH_FEATURE(NO_OVERLAP); + +# undef NO_OVERLAP + + // We combine the active bit with the feature bits so they can be read or + // written in a single atomic operation. Accesses to this atomic are not + // recorded by web replay as they may occur at non-deterministic points. + static mozilla::Atomic + sActiveAndFeatures; +}; + +} // namespace mozilla::profiler::detail + +//--------------------------------------------------------------------------- +// Get information from the profiler +//--------------------------------------------------------------------------- + +// Is the profiler active? Note: the return value of this function can become +// immediately out-of-date. E.g. the profile might be active but then +// profiler_stop() is called immediately afterward. One common and reasonable +// pattern of usage is the following: +// +// if (profiler_is_active()) { +// ExpensiveData expensiveData = CreateExpensiveData(); +// PROFILER_OPERATION(expensiveData); +// } +// +// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this +// case the profiler_is_active() check is just an optimization -- it prevents +// us calling CreateExpensiveData() unnecessarily in most cases, but the +// expensive data will end up being created but not used if another thread +// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION +// calls. +[[nodiscard]] inline bool profiler_is_active() { + return mozilla::profiler::detail::RacyFeatures::IsActive(); +} + +// Same as profiler_is_active(), but also checks if the profiler is not paused. +[[nodiscard]] inline bool profiler_is_active_and_unpaused() { + return mozilla::profiler::detail::RacyFeatures::IsActiveAndUnpaused(); +} + +// Is the profiler active and paused? Returns false if the profiler is inactive. +[[nodiscard]] bool profiler_is_paused(); + +// Is the profiler active and sampling is paused? Returns false if the profiler +// is inactive. +[[nodiscard]] bool profiler_is_sampling_paused(); + +// Get all the features supported by the profiler that are accepted by +// profiler_start(). The result is the same whether the profiler is active or +// not. +[[nodiscard]] uint32_t profiler_get_available_features(); + +// Returns the full feature set if the profiler is active. +// Note: the return value can become immediately out-of-date, much like the +// return value of profiler_is_active(). +[[nodiscard]] inline mozilla::Maybe profiler_features_if_active() { + return mozilla::profiler::detail::RacyFeatures::FeaturesIfActive(); +} + +// Returns the full feature set if the profiler is active and unpaused. +// Note: the return value can become immediately out-of-date, much like the +// return value of profiler_is_active(). +[[nodiscard]] inline mozilla::Maybe +profiler_features_if_active_and_unpaused() { + return mozilla::profiler::detail::RacyFeatures::FeaturesIfActiveAndUnpaused(); +} + +// Check if a profiler feature (specified via the ProfilerFeature type) is +// active. Returns false if the profiler is inactive. Note: the return value +// can become immediately out-of-date, much like the return value of +// profiler_is_active(). +[[nodiscard]] bool profiler_feature_active(uint32_t aFeature); + +// Check if the profiler is active without a feature (specified via the +// ProfilerFeature type). Note: the return value can become immediately +// out-of-date, much like the return value of profiler_is_active(). +[[nodiscard]] bool profiler_active_without_feature(uint32_t aFeature); + +// Returns true if any of the profiler mutexes are currently locked *on the +// current thread*. This may be used by re-entrant code that may call profiler +// functions while the same of a different profiler mutex is locked, which could +// deadlock. +[[nodiscard]] bool profiler_is_locked_on_current_thread(); + +// Install a callback to be invoked at any of the given profiling state changes. +// An optional non-zero identifier may be given, to allow later removal of the +// callback, the caller is responsible for making sure it's really unique (e.g., +// by using a pointer to an object it owns.) +void profiler_add_state_change_callback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aCallback, uintptr_t aUniqueIdentifier = 0); + +// Remove the callback with the given non-zero identifier. +void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier); + +#endif // MOZ_GECKO_PROFILER + +#endif // ProfilerState_h diff --git a/tools/profiler/public/ProfilerThreadPlatformData.h b/tools/profiler/public/ProfilerThreadPlatformData.h new file mode 100644 index 0000000000..c243a8ee02 --- /dev/null +++ b/tools/profiler/public/ProfilerThreadPlatformData.h @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerThreadPlatformData_h +#define ProfilerThreadPlatformData_h + +#include "mozilla/ProfilerUtils.h" + +#if defined(__APPLE__) +# include +#elif defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__) +# include "mozilla/Maybe.h" +# include +#endif + +namespace mozilla::profiler { + +class PlatformData { +#if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(MOZ_GECKO_PROFILER) + public: + explicit PlatformData(ProfilerThreadId aThreadId); + ~PlatformData(); + + // Faking win32's HANDLE, because #including "windows.h" here causes trouble + // (e.g., it #defines `Yield` as nothing!) + // This type is static_check'ed against HANDLE in platform-win32.cpp. + using WindowsHandle = void*; + WindowsHandle ProfiledThread() const { return mProfiledThread; } + + private: + WindowsHandle mProfiledThread; +#elif defined(__APPLE__) && defined(MOZ_GECKO_PROFILER) + public: + explicit PlatformData(ProfilerThreadId aThreadId); + ~PlatformData(); + thread_act_t ProfiledThread() const { return mProfiledThread; } + + private: + // Note: for mProfiledThread Mach primitives are used instead of pthread's + // because the latter doesn't provide thread manipulation primitives + // required. For details, consult "Mac OS X Internals" book, Section 7.3. + thread_act_t mProfiledThread; +#elif (defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__)) && \ + defined(MOZ_GECKO_PROFILER) + public: + explicit PlatformData(ProfilerThreadId aThreadId); + ~PlatformData(); + // Clock Id for this profiled thread. `Nothing` if `pthread_getcpuclockid` + // failed (e.g., if the system doesn't support per-thread clocks). + Maybe GetClockId() const { return mClockId; } + + private: + Maybe mClockId; +#else + public: + explicit PlatformData(ProfilerThreadId aThreadId) {} +#endif +}; + +/** + * Return the number of nanoseconds of CPU time used since thread start. + * + * @return true on success. + */ +#if defined(MOZ_GECKO_PROFILER) +bool GetCpuTimeSinceThreadStartInNs(uint64_t* aResult, + const PlatformData& aPlatformData); +#else +static inline bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const PlatformData& aPlatformData) { + return false; +} +#endif + +} // namespace mozilla::profiler + +#endif // ProfilerThreadPlatformData_h diff --git a/tools/profiler/public/ProfilerThreadRegistration.h b/tools/profiler/public/ProfilerThreadRegistration.h new file mode 100644 index 0000000000..3fb931987d --- /dev/null +++ b/tools/profiler/public/ProfilerThreadRegistration.h @@ -0,0 +1,367 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerThreadRegistration_h +#define ProfilerThreadRegistration_h + +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/ProfilerThreadRegistrationData.h" +#include "mozilla/ThreadLocal.h" + +namespace mozilla::profiler { + +class ThreadRegistry; + +// To use as RAII object, or through RegisterThread/UnregisterThread. +// Automatically registers itself with TLS and Profiler. +// It can be safely nested, but nested instances are just ignored. +// See Get.../With... functions for how to access the data. +class ThreadRegistration { + private: + using DataMutex = baseprofiler::detail::BaseProfilerMutex; + using DataLock = baseprofiler::detail::BaseProfilerAutoLock; + + public: + // Constructor to use as RAII auto-registration object. + // It stores itself in the TLS (its effective owner), and gives its pointer to + // the Profiler. + ThreadRegistration(const char* aName, const void* aStackTop); + + // Destruction reverses construction: Remove pointer from the Profiler (except + // for the main thread, because it should be done by the profiler itself) and + // from the TLS. + ~ThreadRegistration(); + + // Manual construction&destruction, if RAII is not possible or too expensive + // in stack space. + // RegisterThread() *must* be paired with exactly one UnregisterThread() on + // the same thread. (Extra UnregisterThread() calls are handled safely, but + // they may cause profiling of this thread to stop earlier than expected.) + static ProfilingStack* RegisterThread(const char* aName, + const void* aStackTop); + static void UnregisterThread(); + + [[nodiscard]] static bool IsRegistered() { return GetFromTLS(); } + + // Prevent copies&moves. + ThreadRegistration(const ThreadRegistration&) = delete; + ThreadRegistration& operator=(const ThreadRegistration&) = delete; + + // Aliases to data accessors (removing the ThreadRegistration prefix). + + using UnlockedConstReader = ThreadRegistrationUnlockedConstReader; + using UnlockedConstReaderAndAtomicRW = + ThreadRegistrationUnlockedConstReaderAndAtomicRW; + using UnlockedRWForLockedProfiler = + ThreadRegistrationUnlockedRWForLockedProfiler; + using UnlockedReaderAndAtomicRWOnThread = + ThreadRegistrationUnlockedReaderAndAtomicRWOnThread; + using LockedRWFromAnyThread = ThreadRegistrationLockedRWFromAnyThread; + using LockedRWOnThread = ThreadRegistrationLockedRWOnThread; + + // On-thread access from the TLS, providing the following data accessors: + // UnlockedConstReader, UnlockedConstReaderAndAtomicRW, + // UnlockedRWForLockedProfiler, UnlockedReaderAndAtomicRWOnThread, and + // LockedRWOnThread. + // (See ThreadRegistry class for OFF-thread access.) + + // Reference-like class pointing at the ThreadRegistration for the current + // thread. + class OnThreadRef { + public: + // const UnlockedConstReader + + [[nodiscard]] const UnlockedConstReader& UnlockedConstReaderCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReader(F&& aF) const { + return std::forward(aF)(UnlockedConstReaderCRef()); + } + + // const UnlockedConstReaderAndAtomicRW + + [[nodiscard]] const UnlockedConstReaderAndAtomicRW& + UnlockedConstReaderAndAtomicRWCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReaderAndAtomicRW(F&& aF) const { + return std::forward(aF)(UnlockedConstReaderAndAtomicRWCRef()); + } + + // UnlockedConstReaderAndAtomicRW + + [[nodiscard]] UnlockedConstReaderAndAtomicRW& + UnlockedConstReaderAndAtomicRWRef() { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReaderAndAtomicRW(F&& aF) { + return std::forward(aF)(UnlockedConstReaderAndAtomicRWRef()); + } + + // const UnlockedRWForLockedProfiler + + [[nodiscard]] const UnlockedRWForLockedProfiler& + UnlockedRWForLockedProfilerCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedRWForLockedProfiler(F&& aF) const { + return std::forward(aF)(UnlockedRWForLockedProfilerCRef()); + } + + // UnlockedRWForLockedProfiler + + [[nodiscard]] UnlockedRWForLockedProfiler& + UnlockedRWForLockedProfilerRef() { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedRWForLockedProfiler(F&& aF) { + return std::forward(aF)(UnlockedRWForLockedProfilerRef()); + } + + // const UnlockedReaderAndAtomicRWOnThread + + [[nodiscard]] const UnlockedReaderAndAtomicRWOnThread& + UnlockedReaderAndAtomicRWOnThreadCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedReaderAndAtomicRWOnThread(F&& aF) const { + return std::forward(aF)(UnlockedReaderAndAtomicRWOnThreadCRef()); + } + + // UnlockedReaderAndAtomicRWOnThread + + [[nodiscard]] UnlockedReaderAndAtomicRWOnThread& + UnlockedReaderAndAtomicRWOnThreadRef() { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedReaderAndAtomicRWOnThread(F&& aF) { + return std::forward(aF)(UnlockedReaderAndAtomicRWOnThreadRef()); + } + + // const LockedRWOnThread through ConstRWOnThreadWithLock + + // Locking order: Profiler, ThreadRegistry, ThreadRegistration. + class ConstRWOnThreadWithLock { + public: + [[nodiscard]] const LockedRWOnThread& DataCRef() const { + return mLockedRWOnThread; + } + [[nodiscard]] const LockedRWOnThread* operator->() const { + return &mLockedRWOnThread; + } + + private: + friend class OnThreadRef; + ConstRWOnThreadWithLock(const LockedRWOnThread& aLockedRWOnThread, + DataMutex& aDataMutex) + : mLockedRWOnThread(aLockedRWOnThread), mDataLock(aDataMutex) {} + + const LockedRWOnThread& mLockedRWOnThread; + DataLock mDataLock; + }; + + [[nodiscard]] ConstRWOnThreadWithLock ConstLockedRWOnThread() const { + return ConstRWOnThreadWithLock{mThreadRegistration->mData, + mThreadRegistration->mDataMutex}; + } + + template + auto WithConstLockedRWOnThread(F&& aF) const { + ConstRWOnThreadWithLock lockedData = ConstLockedRWOnThread(); + return std::forward(aF)(lockedData.DataCRef()); + } + + // LockedRWOnThread through RWOnThreadWithLock + + // Locking order: Profiler, ThreadRegistry, ThreadRegistration. + class RWOnThreadWithLock { + public: + [[nodiscard]] const LockedRWOnThread& DataCRef() const { + return mLockedRWOnThread; + } + [[nodiscard]] LockedRWOnThread& DataRef() { return mLockedRWOnThread; } + [[nodiscard]] const LockedRWOnThread* operator->() const { + return &mLockedRWOnThread; + } + [[nodiscard]] LockedRWOnThread* operator->() { + return &mLockedRWOnThread; + } + + private: + friend class OnThreadRef; + RWOnThreadWithLock(LockedRWOnThread& aLockedRWOnThread, + DataMutex& aDataMutex) + : mLockedRWOnThread(aLockedRWOnThread), mDataLock(aDataMutex) {} + + LockedRWOnThread& mLockedRWOnThread; + DataLock mDataLock; + }; + + [[nodiscard]] RWOnThreadWithLock GetLockedRWOnThread() { + return RWOnThreadWithLock{mThreadRegistration->mData, + mThreadRegistration->mDataMutex}; + } + + template + auto WithLockedRWOnThread(F&& aF) { + RWOnThreadWithLock lockedData = GetLockedRWOnThread(); + return std::forward(aF)(lockedData.DataRef()); + } + + // This is needed to allow OnThreadPtr::operator-> to return a temporary + // OnThreadRef object, for which `->` must work; Here it provides a pointer + // to itself, so that the next follow-up `->` will work as member accessor. + OnThreadRef* operator->() && { return this; } + + private: + // Only ThreadRegistration should construct an OnThreadRef. + friend class ThreadRegistration; + explicit OnThreadRef(ThreadRegistration& aThreadRegistration) + : mThreadRegistration(&aThreadRegistration) {} + + // Allow ThreadRegistry to read mThreadRegistration. + friend class ThreadRegistry; + + // Guaranted to be non-null by construction from a reference. + ThreadRegistration* mThreadRegistration; + }; + + // Pointer-like class pointing at the ThreadRegistration for the current + // thread, if one was registered. + class OnThreadPtr { + public: + [[nodiscard]] explicit operator bool() const { return mThreadRegistration; } + + // Note that this resolves to a temporary OnThreadRef object, which has all + // the allowed data accessors. + [[nodiscard]] OnThreadRef operator*() const { + MOZ_ASSERT(mThreadRegistration); + return OnThreadRef(*mThreadRegistration); + } + + // Note that this resolves to a temporary OnThreadRef object, which also + // overloads operator-> and has all the allowed data accessors. + [[nodiscard]] OnThreadRef operator->() const { + MOZ_ASSERT(mThreadRegistration); + return OnThreadRef(*mThreadRegistration); + } + + private: + friend class ThreadRegistration; + explicit OnThreadPtr(ThreadRegistration* aThreadRegistration) + : mThreadRegistration(aThreadRegistration) {} + + ThreadRegistration* mThreadRegistration; + }; + + [[nodiscard]] static OnThreadPtr GetOnThreadPtr() { + return OnThreadPtr{GetFromTLS()}; + } + + // Call `F(OnThreadRef)`. + template + static void WithOnThreadRef(F&& aF) { + const auto* tls = GetTLS(); + if (tls) { + ThreadRegistration* tr = tls->get(); + if (tr) { + std::forward(aF)(OnThreadRef{*tr}); + } + } + } + + // Call `F(OnThreadRef)`. + template + [[nodiscard]] static auto WithOnThreadRefOr(F&& aF, + FallbackReturn&& aFallbackReturn) + -> decltype(std::forward(aF)(std::declval())) { + const auto* tls = GetTLS(); + if (tls) { + ThreadRegistration* tr = tls->get(); + if (tr) { + return std::forward(aF)(OnThreadRef{*tr}); + } + } + return std::forward(aFallbackReturn); + } + + [[nodiscard]] static bool IsDataMutexLockedOnCurrentThread() { + if (const ThreadRegistration* tr = GetFromTLS(); tr) { + return tr->mDataMutex.IsLockedOnCurrentThread(); + } + return false; + } + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + DataLock lock(mDataMutex); + return mData.SizeOfExcludingThis(aMallocSizeOf); + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + // aMallocSizeOf can only be used on head-allocated objects. Stack + // allocations and static objects are not counted. + return (mIsOnHeap ? aMallocSizeOf(this) : 0) + + SizeOfExcludingThis(aMallocSizeOf); + } + + private: + friend class ThreadRegistry; + + // This is what is embedded inside ThreadRegistration. + // References to sub-classes will be provided, to limit access as appropriate. + class EmbeddedData final : public LockedRWOnThread { + private: + // Only ThreadRegistration can construct (its embedded) `mData`. + friend class ThreadRegistration; + EmbeddedData(const char* aName, const void* aStackTop) + : LockedRWOnThread(aName, aStackTop) {} + }; + EmbeddedData mData; + + // Used when writing on self thread, and for any access from any thread. + // Locking order: Profiler, ThreadRegistry, ThreadRegistration. + mutable DataMutex mDataMutex; + + // In case of nested (non-RAII) registrations. Only accessed on thread. + int mOtherRegistrations = 0; + + // Set to true if allocated by `RegisterThread()`. Otherwise we assume that it + // is on the stack. + bool mIsOnHeap = false; + + // Only accessed by ThreadRegistry on this thread. + bool mIsRegistryLockedSharedOnThisThread = false; + + static MOZ_THREAD_LOCAL(ThreadRegistration*) tlsThreadRegistration; + + [[nodiscard]] static decltype(tlsThreadRegistration)* GetTLS() { + static const bool initialized = tlsThreadRegistration.init(); + return initialized ? &tlsThreadRegistration : nullptr; + } + + [[nodiscard]] static ThreadRegistration* GetFromTLS() { + const auto tls = GetTLS(); + return tls ? tls->get() : nullptr; + } +}; + +} // namespace mozilla::profiler + +#endif // ProfilerThreadRegistration_h diff --git a/tools/profiler/public/ProfilerThreadRegistrationData.h b/tools/profiler/public/ProfilerThreadRegistrationData.h new file mode 100644 index 0000000000..7c14290e4c --- /dev/null +++ b/tools/profiler/public/ProfilerThreadRegistrationData.h @@ -0,0 +1,537 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains classes that hold data related to thread profiling: +// Data members are stored `protected` in `ThreadRegistrationData`. +// Non-virtual sub-classes of ProfilerThreadRegistrationData provide layers of +// public accessors to subsets of the data. Each level builds on the previous +// one and adds further access to more data, but always with the appropriate +// guards where necessary. +// These classes have protected constructors, so only some trusted classes +// `ThreadRegistration` and `ThreadRegistry` will be able to construct them, and +// then give limited access depending on who asks (the owning thread or another +// one), and how much data they actually need. +// +// The hierarchy is, from base to most derived: +// - ThreadRegistrationData +// - ThreadRegistrationUnlockedConstReader +// - ThreadRegistrationUnlockedConstReaderAndAtomicRW +// - ThreadRegistrationUnlockedRWForLockedProfiler +// - ThreadRegistrationUnlockedReaderAndAtomicRWOnThread +// - ThreadRegistrationLockedRWFromAnyThread +// - ThreadRegistrationLockedRWOnThread +// - ThreadRegistration::EmbeddedData (actual data member in ThreadRegistration) +// +// Tech detail: These classes need to be a single hierarchy so that +// `ThreadRegistration` can contain the most-derived class, and from there can +// publish references to base classes without relying on Undefined Behavior. +// (It's not allowed to have some object and give a reference to a sub-class, +// unless that object was *really* constructed as that sub-class at least, even +// if that sub-class only adds member functions!) +// And where appropriate, these references will come along with the required +// lock. + +#ifndef ProfilerThreadRegistrationData_h +#define ProfilerThreadRegistrationData_h + +#include "js/ProfilingFrameIterator.h" +#include "js/ProfilingStack.h" +#include "mozilla/Atomics.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/ProfilerThreadPlatformData.h" +#include "mozilla/ProfilerThreadRegistrationInfo.h" +#include "nsCOMPtr.h" +#include "nsIThread.h" + +class ProfiledThreadData; +class PSAutoLock; +struct JSContext; + +// Enum listing which profiling features are active for a single thread. +enum class ThreadProfilingFeatures : uint32_t { + // The thread is not being profiled at all (either the profiler is not + // running, or this thread is not examined during profiling.) + NotProfiled = 0u, + + // Single features, binary exclusive. May be `Combine()`d. + CPUUtilization = 1u << 0, + Sampling = 1u << 1, + Markers = 1u << 2, + + // All possible features. Usually used as a mask to see if any feature is + // active at a given time. + Any = CPUUtilization | Sampling | Markers +}; + +// Binary OR of one of more ThreadProfilingFeatures, to mix all arguments. +template +[[nodiscard]] constexpr ThreadProfilingFeatures Combine( + ThreadProfilingFeatures a1, Ts... as) { + static_assert((true && ... && + std::is_same_v>, + ThreadProfilingFeatures>)); + return static_cast( + (static_cast>(a1) | ... | + static_cast>(as))); +} + +// Binary AND of one of more ThreadProfilingFeatures, to find features common to +// all arguments. +template +[[nodiscard]] constexpr ThreadProfilingFeatures Intersect( + ThreadProfilingFeatures a1, Ts... as) { + static_assert((true && ... && + std::is_same_v>, + ThreadProfilingFeatures>)); + return static_cast( + (static_cast>(a1) & ... & + static_cast>(as))); +} + +// Are there features in common between the two given sets? +// Mostly useful to test if any of a set of features is present in another set. +template +[[nodiscard]] constexpr bool DoFeaturesIntersect(ThreadProfilingFeatures a1, + ThreadProfilingFeatures a2) { + return Intersect(a1, a2) != ThreadProfilingFeatures::NotProfiled; +} + +namespace mozilla::profiler { + +// All data members related to thread profiling are stored here. +// See derived classes below, which give limited unlocked/locked read/write +// access in different situations, and will be available through +// ThreadRegistration and ThreadRegistry. +class ThreadRegistrationData { + public: + // No public accessors here. See derived classes for accessors, and + // Get.../With... functions for who can use these accessors. + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + // Not including data that is not fully owned here. + return 0; + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + static constexpr size_t MAX_JS_FRAMES = 1024; + using JsFrame = JS::ProfilingFrameIterator::Frame; + using JsFrameBuffer = JsFrame[MAX_JS_FRAMES]; + + // `protected` to allow derived classes to read all data members. + protected: + ThreadRegistrationData(const char* aName, const void* aStackTop); + +#ifdef DEBUG + // Destructor only used to check invariants. + ~ThreadRegistrationData() { + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + MOZ_ASSERT(!mProfiledThreadData, + "mProfiledThreadData pointer should have been reset before " + "~ThreadRegistrationData"); + } +#endif // DEBUG + + // Permanent thread information. + // Set at construction, read from anywhere, moved-from at destruction. + ThreadRegistrationInfo mInfo; + + // Contains profiler labels and JS frames. + // Deep-written on thread only, deep-read from thread and suspended thread. + ProfilingStack mProfilingStack; + + // In practice, only read from thread and suspended thread. + PlatformData mPlatformData; + + // Only read from thread and suspended thread. + const void* const mStackTop; + + // Written from thread, read from thread and suspended thread. + nsCOMPtr mThread; + + // If this is a JS thread, this is its JSContext, which is required for any + // JS sampling. + // Written from thread, read from thread and suspended thread. + JSContext* mJSContext = nullptr; + + // If mJSContext is not null AND the thread is being profiled, this points at + // the start of a JsFrameBuffer to be used for on-thread synchronous sampling. + JsFrame* mJsFrameBuffer = nullptr; + + // The profiler needs to start and stop JS sampling of JS threads at various + // times. However, the JS engine can only do the required actions on the + // JS thread itself ("on-thread"), not from another thread ("off-thread"). + // Therefore, we have the following two-step process. + // + // - The profiler requests (on-thread or off-thread) that the JS sampling be + // started/stopped, by changing mJSSampling to the appropriate REQUESTED + // state. + // + // - The relevant JS thread polls (on-thread) for changes to mJSSampling. + // When it sees a REQUESTED state, it performs the appropriate actions to + // actually start/stop JS sampling, and changes mJSSampling out of the + // REQUESTED state. + // + // The state machine is as follows. + // + // INACTIVE --> ACTIVE_REQUESTED + // ^ ^ | + // | _/ | + // | _/ | + // | / | + // | v v + // INACTIVE_REQUESTED <-- ACTIVE + // + // The polling is done in the following two ways. + // + // - Via the interrupt callback mechanism; the JS thread must call + // profiler_js_interrupt_callback() from its own interrupt callback. + // This is how sampling must be started/stopped for threads where the + // request was made off-thread. + // + // - When {Start,Stop}JSSampling() is called on-thread, we can immediately + // follow it with a PollJSSampling() call to avoid the delay between the + // two steps. Likewise, setJSContext() calls PollJSSampling(). + // + // One non-obvious thing about all this: these JS sampling requests are made + // on all threads, even non-JS threads. mContext needs to also be set (via + // setJSContext(), which can only happen for JS threads) for any JS sampling + // to actually happen. + // + enum { + INACTIVE = 0, + ACTIVE_REQUESTED = 1, + ACTIVE = 2, + INACTIVE_REQUESTED = 3, + } mJSSampling = INACTIVE; + + uint32_t mJSFlags = 0; + + // Flags to conveniently track various JS instrumentations. + enum class JSInstrumentationFlags { + StackSampling = 0x1, + Allocations = 0x2, + }; + + [[nodiscard]] bool JSAllocationsEnabled() const { + return mJSFlags & uint32_t(JSInstrumentationFlags::Allocations); + } + + // The following members may be modified from another thread. + // They need to be atomic, because LockData() does not prevent reads from + // the owning thread. + + // mSleep tracks whether the thread is sleeping, and if so, whether it has + // been previously observed. This is used for an optimization: in some + // cases, when a thread is asleep, we duplicate the previous sample, which + // is cheaper than taking a new sample. + // + // mSleep is atomic because it is accessed from multiple threads. + // + // - It is written only by this thread, via setSleeping() and setAwake(). + // + // - It is read by SamplerThread::Run(). + // + // There are two cases where racing between threads can cause an issue. + // + // - If CanDuplicateLastSampleDueToSleep() returns false but that result is + // invalidated before being acted upon, we will take a full sample + // unnecessarily. This is additional work but won't cause any correctness + // issues. (In actual fact, this case is impossible. In order to go from + // CanDuplicateLastSampleDueToSleep() returning false to it returning true + // requires an intermediate call to it in order for mSleep to go from + // SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.) + // + // - If CanDuplicateLastSampleDueToSleep() returns true but that result is + // invalidated before being acted upon -- i.e. the thread wakes up before + // DuplicateLastSample() is called -- we will duplicate the previous + // sample. This is inaccurate, but only slightly... we will effectively + // treat the thread as having slept a tiny bit longer than it really did. + // + // This latter inaccuracy could be avoided by moving the + // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code, + // e.g. the section where Tick() is called. But that would reduce the + // effectiveness of the optimization because more code would have to be run + // before we can tell that duplication is allowed. + // + static const int AWAKE = 0; + static const int SLEEPING_NOT_OBSERVED = 1; + static const int SLEEPING_OBSERVED = 2; + // Read&written from thread and suspended thread. + Atomic mSleep{AWAKE}; + Atomic mThreadCpuTimeInNsAtLastSleep{0}; + +#ifdef NIGHTLY_BUILD + // The first wake is the thread creation. + Atomic mWakeCount{1}; + mutable baseprofiler::detail::BaseProfilerMutex mRecordWakeCountMutex; + mutable uint64_t mAlreadyRecordedWakeCount = 0; + mutable uint64_t mAlreadyRecordedCpuTimeInMs = 0; +#endif + + // Is this thread currently being profiled, and with which features? + // Written from profiler, read from any thread. + // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together.) + Atomic mProfilingFeatures{ + ThreadProfilingFeatures::NotProfiled}; + + // If the profiler is active and this thread is selected for profiling, this + // points at the relevant ProfiledThreadData. + // Fully controlled by the profiler. + // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together). + ProfiledThreadData* mProfiledThreadData = nullptr; +}; + +// Accessing const data from any thread. +class ThreadRegistrationUnlockedConstReader : public ThreadRegistrationData { + public: + [[nodiscard]] const ThreadRegistrationInfo& Info() const { return mInfo; } + + [[nodiscard]] const PlatformData& PlatformDataCRef() const { + return mPlatformData; + } + + [[nodiscard]] const void* StackTop() const { return mStackTop; } + + protected: + ThreadRegistrationUnlockedConstReader(const char* aName, + const void* aStackTop) + : ThreadRegistrationData(aName, aStackTop) {} +}; + +// Accessing atomic data from any thread. +class ThreadRegistrationUnlockedConstReaderAndAtomicRW + : public ThreadRegistrationUnlockedConstReader { + public: + [[nodiscard]] const ProfilingStack& ProfilingStackCRef() const { + return mProfilingStack; + } + [[nodiscard]] ProfilingStack& ProfilingStackRef() { return mProfilingStack; } + + // Similar to `profiler_is_active()`, this atomic flag may become out-of-date. + // It should only be used as an indication to know whether this thread is + // probably being profiled (with some specific features), to avoid doing + // expensive operations otherwise. Edge cases: + // - This thread could get `NotProfiled`, but the profiler has just started, + // so some very early data may be missing. No real impact on profiling. + // - This thread could see profiled features, but the profiled has just + // stopped, so some some work will be done and then discarded when finally + // attempting to write to the buffer. No impact on profiling. + // - This thread could see profiled features, but the profiler will quickly + // stop and restart, so this thread will write information relevant to the + // previous profiling session. Very rare, and little impact on profiling. + [[nodiscard]] ThreadProfilingFeatures ProfilingFeatures() const { + return mProfilingFeatures; + } + + // Call this whenever the current thread sleeps. Calling it twice in a row + // without an intervening setAwake() call is an error. + void SetSleeping() { + MOZ_ASSERT(mSleep == AWAKE); + mSleep = SLEEPING_NOT_OBSERVED; + } + + // Call this whenever the current thread wakes. Calling it twice in a row + // without an intervening setSleeping() call is an error. + void SetAwake() { + MOZ_ASSERT(mSleep != AWAKE); + mSleep = AWAKE; +#ifdef NIGHTLY_BUILD + ++mWakeCount; +#endif + } + + // Returns the CPU time used by the thread since the previous call to this + // method or since the thread was started if this is the first call. + uint64_t GetNewCpuTimeInNs() { + uint64_t newCpuTimeNs; + if (!GetCpuTimeSinceThreadStartInNs(&newCpuTimeNs, PlatformDataCRef())) { + newCpuTimeNs = 0; + } + uint64_t before = mThreadCpuTimeInNsAtLastSleep; + uint64_t result = + MOZ_LIKELY(newCpuTimeNs > before) ? newCpuTimeNs - before : 0; + mThreadCpuTimeInNsAtLastSleep = newCpuTimeNs; + return result; + } + +#ifdef NIGHTLY_BUILD + void RecordWakeCount() const; +#endif + + // This is called on every profiler restart. Put things that should happen + // at that time here. + void ReinitializeOnResume() { + // This is needed to cause an initial sample to be taken from sleeping + // threads that had been observed prior to the profiler stopping and + // restarting. Otherwise sleeping threads would not have any samples to + // copy forward while sleeping. + (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED); + } + + // This returns true for the second and subsequent calls in each sleep + // cycle, so that the sampler can skip its full sampling and reuse the first + // asleep sample instead. + [[nodiscard]] bool CanDuplicateLastSampleDueToSleep() { + if (mSleep == AWAKE) { + return false; + } + if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) { + return false; + } + return true; + } + + [[nodiscard]] bool IsSleeping() const { return mSleep != AWAKE; } + + protected: + ThreadRegistrationUnlockedConstReaderAndAtomicRW(const char* aName, + const void* aStackTop) + : ThreadRegistrationUnlockedConstReader(aName, aStackTop) {} +}; + +// Like above, with special PSAutoLock-guarded accessors. +class ThreadRegistrationUnlockedRWForLockedProfiler + : public ThreadRegistrationUnlockedConstReaderAndAtomicRW { + public: + // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! + // Only add functions that take a `const PSAutoLock&` proof-of-lock. + // (Because there is no other lock.) + + [[nodiscard]] const ProfiledThreadData* GetProfiledThreadData( + const PSAutoLock&) const { + return mProfiledThreadData; + } + + [[nodiscard]] ProfiledThreadData* GetProfiledThreadData(const PSAutoLock&) { + return mProfiledThreadData; + } + + protected: + ThreadRegistrationUnlockedRWForLockedProfiler(const char* aName, + const void* aStackTop) + : ThreadRegistrationUnlockedConstReaderAndAtomicRW(aName, aStackTop) {} +}; + +// Reading data, unlocked from the thread, or locked otherwise. +// This data MUST only be written from the thread with lock (i.e., in +// LockedRWOnThread through RWOnThreadWithLock.) +class ThreadRegistrationUnlockedReaderAndAtomicRWOnThread + : public ThreadRegistrationUnlockedRWForLockedProfiler { + public: + // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! + // Non-atomic members read here MUST be written from LockedRWOnThread (to + // guarantee that they are only modified on this thread.) + + [[nodiscard]] JSContext* GetJSContext() const { return mJSContext; } + + protected: + ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName, + const void* aStackTop) + : ThreadRegistrationUnlockedRWForLockedProfiler(aName, aStackTop) {} +}; + +// Accessing locked data from the thread, or from any thread through the locked +// profiler: + +// Like above, and profiler can also read&write mutex-protected members. +class ThreadRegistrationLockedRWFromAnyThread + : public ThreadRegistrationUnlockedReaderAndAtomicRWOnThread { + public: + void SetProfilingFeaturesAndData(ThreadProfilingFeatures aProfilingFeatures, + ProfiledThreadData* aProfiledThreadData, + const PSAutoLock&); + void ClearProfilingFeaturesAndData(const PSAutoLock&); + + // Not null when JSContext is not null AND this thread is being profiled. + // Points at the start of JsFrameBuffer. + [[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; } + + [[nodiscard]] const nsCOMPtr GetEventTarget() const { + return mThread; + } + + void ResetMainThread(nsIThread* aThread) { mThread = aThread; } + + // aDelay is the time the event that is currently running on the thread was + // queued before starting to run (if a PrioritizedEventQueue + // (i.e. MainThread), this will be 0 for any event at a lower priority + // than Input). + // aRunning is the time the event has been running. If no event is running + // these will both be TimeDuration() (i.e. 0). Both are out params, and are + // always set. Their initial value is discarded. + void GetRunningEventDelay(const TimeStamp& aNow, TimeDuration& aDelay, + TimeDuration& aRunning) { + if (mThread) { // can be null right at the start of a process + TimeStamp start; + mThread->GetRunningEventDelay(&aDelay, &start); + if (!start.IsNull()) { + // Note: the timestamp used here will be from when we started to + // suspend and sample the thread; which is also the timestamp + // associated with the sample. + aRunning = aNow - start; + return; + } + } + aDelay = TimeDuration(); + aRunning = TimeDuration(); + } + + // Request that this thread start JS sampling. JS sampling won't actually + // start until a subsequent PollJSSampling() call occurs *and* mContext has + // been set. + void StartJSSampling(uint32_t aJSFlags) { + // This function runs on-thread or off-thread. + + MOZ_RELEASE_ASSERT(mJSSampling == INACTIVE || + mJSSampling == INACTIVE_REQUESTED); + mJSSampling = ACTIVE_REQUESTED; + mJSFlags = aJSFlags; + } + + // Request that this thread stop JS sampling. JS sampling won't actually + // stop until a subsequent PollJSSampling() call occurs. + void StopJSSampling() { + // This function runs on-thread or off-thread. + + MOZ_RELEASE_ASSERT(mJSSampling == ACTIVE || + mJSSampling == ACTIVE_REQUESTED); + mJSSampling = INACTIVE_REQUESTED; + } + + protected: + ThreadRegistrationLockedRWFromAnyThread(const char* aName, + const void* aStackTop) + : ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(aName, aStackTop) {} +}; + +// Accessing data, locked, from the thread. +// If any non-atomic data is readable from UnlockedReaderAndAtomicRWOnThread, +// it must be written from here, and not in base classes: Since this data is +// only written on the thread, it can be read from the same thread without +// lock; but writing must be locked so that other threads can safely read it, +// typically from LockedRWFromAnyThread. +class ThreadRegistrationLockedRWOnThread + : public ThreadRegistrationLockedRWFromAnyThread { + public: + void SetJSContext(JSContext* aJSContext); + void ClearJSContext(); + + // Poll to see if JS sampling should be started/stopped. + void PollJSSampling(); + + public: + ThreadRegistrationLockedRWOnThread(const char* aName, const void* aStackTop) + : ThreadRegistrationLockedRWFromAnyThread(aName, aStackTop) {} +}; + +} // namespace mozilla::profiler + +#endif // ProfilerThreadRegistrationData_h diff --git a/tools/profiler/public/ProfilerThreadRegistrationInfo.h b/tools/profiler/public/ProfilerThreadRegistrationInfo.h new file mode 100644 index 0000000000..e116c3059e --- /dev/null +++ b/tools/profiler/public/ProfilerThreadRegistrationInfo.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerThreadRegistrationInfo_h +#define ProfilerThreadRegistrationInfo_h + +#include "mozilla/BaseAndGeckoProfilerDetail.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/TimeStamp.h" + +#include + +namespace mozilla::profiler { + +// This class contains immutable information about a thread which needs to be +// stored across restarts of the profiler and which can be useful even after the +// thread has stopped running. +class ThreadRegistrationInfo { + public: + // Construct on the thread. + explicit ThreadRegistrationInfo(const char* aName) : mName(aName) {} + + // Construct for a foreign thread (e.g., Java). + ThreadRegistrationInfo(const char* aName, ProfilerThreadId aThreadId, + bool aIsMainThread, const TimeStamp& aRegisterTime) + : mName(aName), + mRegisterTime(aRegisterTime), + mThreadId(aThreadId), + mIsMainThread(aIsMainThread) {} + + // Only allow move construction, for extraction when the thread ends. + ThreadRegistrationInfo(ThreadRegistrationInfo&&) = default; + + // Other copies/moves disallowed. + ThreadRegistrationInfo(const ThreadRegistrationInfo&) = delete; + ThreadRegistrationInfo& operator=(const ThreadRegistrationInfo&) = delete; + ThreadRegistrationInfo& operator=(ThreadRegistrationInfo&&) = delete; + + [[nodiscard]] const char* Name() const { return mName.c_str(); } + [[nodiscard]] const TimeStamp& RegisterTime() const { return mRegisterTime; } + [[nodiscard]] ProfilerThreadId ThreadId() const { return mThreadId; } + [[nodiscard]] bool IsMainThread() const { return mIsMainThread; } + + private: + static TimeStamp ExistingRegisterTimeOrNow() { + TimeStamp registerTime = baseprofiler::detail::GetThreadRegistrationTime(); + if (!registerTime) { + registerTime = TimeStamp::Now(); + } + return registerTime; + } + + const std::string mName; + const TimeStamp mRegisterTime = ExistingRegisterTimeOrNow(); + const ProfilerThreadId mThreadId = profiler_current_thread_id(); + const bool mIsMainThread = profiler_is_main_thread(); +}; + +} // namespace mozilla::profiler + +#endif // ProfilerThreadRegistrationInfo_h diff --git a/tools/profiler/public/ProfilerThreadRegistry.h b/tools/profiler/public/ProfilerThreadRegistry.h new file mode 100644 index 0000000000..4d0fd3ef68 --- /dev/null +++ b/tools/profiler/public/ProfilerThreadRegistry.h @@ -0,0 +1,321 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerThreadRegistry_h +#define ProfilerThreadRegistry_h + +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/ProfilerThreadRegistration.h" +#include "mozilla/Vector.h" + +namespace mozilla::profiler { + +class ThreadRegistry { + private: + using RegistryMutex = baseprofiler::detail::BaseProfilerSharedMutex; + using RegistryLockExclusive = + baseprofiler::detail::BaseProfilerAutoLockExclusive; + using RegistryLockShared = baseprofiler::detail::BaseProfilerAutoLockShared; + + public: + // Aliases to data accessors (removing the ThreadRegistration prefix). + + using UnlockedConstReader = ThreadRegistrationUnlockedConstReader; + using UnlockedConstReaderAndAtomicRW = + ThreadRegistrationUnlockedConstReaderAndAtomicRW; + using UnlockedRWForLockedProfiler = + ThreadRegistrationUnlockedRWForLockedProfiler; + using UnlockedReaderAndAtomicRWOnThread = + ThreadRegistrationUnlockedReaderAndAtomicRWOnThread; + using LockedRWFromAnyThread = ThreadRegistrationLockedRWFromAnyThread; + using LockedRWOnThread = ThreadRegistrationLockedRWOnThread; + + // Off-thread access through the registry, providing the following data + // accessors: UnlockedConstReader, UnlockedConstReaderAndAtomicRW, + // UnlockedRWForLockedProfiler, and LockedRWFromAnyThread. + // (See ThreadRegistration class for ON-thread access.) + + // Reference-like class pointing at a ThreadRegistration. + // It should only exist while sRegistryMutex is locked. + class OffThreadRef { + public: + // const UnlockedConstReader + + [[nodiscard]] const UnlockedConstReader& UnlockedConstReaderCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReader(F&& aF) const { + return std::forward(aF)(UnlockedConstReaderCRef()); + } + + // const UnlockedConstReaderAndAtomicRW + + [[nodiscard]] const UnlockedConstReaderAndAtomicRW& + UnlockedConstReaderAndAtomicRWCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReaderAndAtomicRW(F&& aF) const { + return std::forward(aF)(UnlockedConstReaderAndAtomicRWCRef()); + } + + // UnlockedConstReaderAndAtomicRW + + [[nodiscard]] UnlockedConstReaderAndAtomicRW& + UnlockedConstReaderAndAtomicRWRef() { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedConstReaderAndAtomicRW(F&& aF) { + return std::forward(aF)(UnlockedConstReaderAndAtomicRWRef()); + } + + // const UnlockedRWForLockedProfiler + + [[nodiscard]] const UnlockedRWForLockedProfiler& + UnlockedRWForLockedProfilerCRef() const { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedRWForLockedProfiler(F&& aF) const { + return std::forward(aF)(UnlockedRWForLockedProfilerCRef()); + } + + // UnlockedRWForLockedProfiler + + [[nodiscard]] UnlockedRWForLockedProfiler& + UnlockedRWForLockedProfilerRef() { + return mThreadRegistration->mData; + } + + template + auto WithUnlockedRWForLockedProfiler(F&& aF) { + return std::forward(aF)(UnlockedRWForLockedProfilerRef()); + } + + // const LockedRWFromAnyThread through ConstRWFromAnyThreadWithLock + + class ConstRWFromAnyThreadWithLock { + public: + [[nodiscard]] const LockedRWFromAnyThread& DataCRef() const { + return mLockedRWFromAnyThread; + } + [[nodiscard]] const LockedRWFromAnyThread* operator->() const { + return &mLockedRWFromAnyThread; + } + + ConstRWFromAnyThreadWithLock( + const LockedRWFromAnyThread& aLockedRWFromAnyThread, + ThreadRegistration::DataMutex& aDataMutex) + : mLockedRWFromAnyThread(aLockedRWFromAnyThread), + mDataLock(aDataMutex) {} + + private: + const LockedRWFromAnyThread& mLockedRWFromAnyThread; + ThreadRegistration::DataLock mDataLock; + }; + + [[nodiscard]] ConstRWFromAnyThreadWithLock ConstLockedRWFromAnyThread() + const { + return ConstRWFromAnyThreadWithLock{mThreadRegistration->mData, + mThreadRegistration->mDataMutex}; + } + + template + auto WithConstLockedRWFromAnyThread(F&& aF) const { + ConstRWFromAnyThreadWithLock lockedData = ConstLockedRWFromAnyThread(); + return std::forward(aF)(lockedData.DataCRef()); + } + + // LockedRWFromAnyThread through RWFromAnyThreadWithLock + + class RWFromAnyThreadWithLock { + public: + [[nodiscard]] const LockedRWFromAnyThread& DataCRef() const { + return mLockedRWFromAnyThread; + } + [[nodiscard]] LockedRWFromAnyThread& DataRef() { + return mLockedRWFromAnyThread; + } + [[nodiscard]] const LockedRWFromAnyThread* operator->() const { + return &mLockedRWFromAnyThread; + } + [[nodiscard]] LockedRWFromAnyThread* operator->() { + return &mLockedRWFromAnyThread; + } + + // In some situations, it may be useful to do some on-thread operations if + // we are indeed on this thread now. The lock is still held here; caller + // should not use this pointer longer than this RWFromAnyThreadWithLock. + [[nodiscard]] LockedRWOnThread* GetLockedRWOnThread() { + if (mLockedRWFromAnyThread.Info().ThreadId() == + profiler_current_thread_id()) { + // mLockedRWFromAnyThread references a subclass of the + // ThreadRegistration's mData, so it's safe to downcast it to another + // hierarchy level of the object. + return &static_cast(mLockedRWFromAnyThread); + } + return nullptr; + } + + private: + friend class OffThreadRef; + RWFromAnyThreadWithLock(LockedRWFromAnyThread& aLockedRWFromAnyThread, + ThreadRegistration::DataMutex& aDataMutex) + : mLockedRWFromAnyThread(aLockedRWFromAnyThread), + mDataLock(aDataMutex) {} + + LockedRWFromAnyThread& mLockedRWFromAnyThread; + ThreadRegistration::DataLock mDataLock; + }; + + [[nodiscard]] RWFromAnyThreadWithLock GetLockedRWFromAnyThread() { + return RWFromAnyThreadWithLock{mThreadRegistration->mData, + mThreadRegistration->mDataMutex}; + } + + template + auto WithLockedRWFromAnyThread(F&& aF) { + RWFromAnyThreadWithLock lockedData = GetLockedRWFromAnyThread(); + return std::forward(aF)(lockedData.DataRef()); + } + + private: + // Only ThreadRegistry should construct an OnThreadRef. + friend class ThreadRegistry; + explicit OffThreadRef(ThreadRegistration& aThreadRegistration) + : mThreadRegistration(&aThreadRegistration) {} + + // If we have an ON-thread ref, it's safe to convert to an OFF-thread ref. + explicit OffThreadRef(ThreadRegistration::OnThreadRef aOnThreadRef) + : mThreadRegistration(aOnThreadRef.mThreadRegistration) {} + + [[nodiscard]] bool IsPointingAt( + ThreadRegistration& aThreadRegistration) const { + return mThreadRegistration == &aThreadRegistration; + } + + // Guaranted to be non-null by construction. + ThreadRegistration* mThreadRegistration; + }; + + // Lock the registry non-exclusively and allow iteration. E.g.: + // `for (OffThreadRef thread : LockedRegistry{}) { ... }` + // Do *not* export copies/references, as they could become dangling. + // Locking order: Profiler, ThreadRegistry, ThreadRegistration. + class LockedRegistry { + public: + LockedRegistry() + : mRegistryLock([]() -> RegistryMutex& { + MOZ_ASSERT(!IsRegistryMutexLockedOnCurrentThread(), + "Recursive locking detected"); + // In DEBUG builds, *before* we attempt to lock sRegistryMutex, we + // want to check that the ThreadRegistration mutex is *not* locked + // on this thread, to avoid inversion deadlocks. + MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread()); + return sRegistryMutex; + }()) { + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.mThreadRegistration + ->mIsRegistryLockedSharedOnThisThread = true; + }); + } + + ~LockedRegistry() { + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.mThreadRegistration + ->mIsRegistryLockedSharedOnThisThread = false; + }); + } + + [[nodiscard]] const OffThreadRef* begin() const { + return sRegistryContainer.begin(); + } + [[nodiscard]] OffThreadRef* begin() { return sRegistryContainer.begin(); } + [[nodiscard]] const OffThreadRef* end() const { + return sRegistryContainer.end(); + } + [[nodiscard]] OffThreadRef* end() { return sRegistryContainer.end(); } + + private: + RegistryLockShared mRegistryLock; + }; + + // Call `F(OffThreadRef)` for the given aThreadId. + template + static void WithOffThreadRef(ProfilerThreadId aThreadId, F&& aF) { + for (OffThreadRef thread : LockedRegistry{}) { + if (thread.UnlockedConstReaderCRef().Info().ThreadId() == aThreadId) { + std::forward(aF)(thread); + break; + } + } + } + + template + [[nodiscard]] static auto WithOffThreadRefOr(ProfilerThreadId aThreadId, + F&& aF, + FallbackReturn&& aFallbackReturn) + -> decltype(std::forward(aF)(std::declval())) { + for (OffThreadRef thread : LockedRegistry{}) { + if (thread.UnlockedConstReaderCRef().Info().ThreadId() == aThreadId) { + return std::forward(aF)(thread); + } + } + return std::forward(aFallbackReturn); + } + + static size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) { + LockedRegistry lockedRegistry; + // "Ex" because we don't count static objects, but we count whatever they + // allocated on the heap. + size_t bytes = sRegistryContainer.sizeOfExcludingThis(aMallocSizeOf); + for (const OffThreadRef& offThreadRef : lockedRegistry) { + bytes += + offThreadRef.mThreadRegistration->SizeOfExcludingThis(aMallocSizeOf); + } + return bytes; + } + + static size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) { + return SizeOfExcludingThis(aMallocSizeOf); + } + + [[nodiscard]] static bool IsRegistryMutexLockedOnCurrentThread() { + return sRegistryMutex.IsLockedExclusiveOnCurrentThread() || + ThreadRegistration::WithOnThreadRefOr( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + return aOnThreadRef.mThreadRegistration + ->mIsRegistryLockedSharedOnThisThread; + }, + false); + } + + private: + using RegistryContainer = Vector; + + static RegistryContainer sRegistryContainer; + + // Mutex protecting the registry. + // Locking order: Profiler, ThreadRegistry, ThreadRegistration. + static RegistryMutex sRegistryMutex; + + // Only allow ThreadRegistration to (un)register itself. + friend class ThreadRegistration; + static void Register(ThreadRegistration::OnThreadRef aOnThreadRef); + static void Unregister(ThreadRegistration::OnThreadRef aOnThreadRef); +}; + +} // namespace mozilla::profiler + +#endif // ProfilerThreadRegistry_h diff --git a/tools/profiler/public/ProfilerThreadSleep.h b/tools/profiler/public/ProfilerThreadSleep.h new file mode 100644 index 0000000000..730176d39f --- /dev/null +++ b/tools/profiler/public/ProfilerThreadSleep.h @@ -0,0 +1,58 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// APIs that inform the profiler when a thread is effectively asleep so that we +// can avoid sampling it more than once. + +#ifndef ProfilerThreadSleep_h +#define ProfilerThreadSleep_h + +#ifndef MOZ_GECKO_PROFILER + +// This file can be #included unconditionally. However, everything within this +// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the +// following macros and functions, which encapsulate the most common operations +// and thus avoid the need for many #ifdefs. + +# define AUTO_PROFILER_THREAD_SLEEP + +static inline void profiler_thread_sleep() {} + +static inline void profiler_thread_wake() {} + +#else // !MOZ_GECKO_PROFILER + +# include "mozilla/Attributes.h" +# include "mozilla/BaseProfilerRAIIMacro.h" + +// These functions tell the profiler that a thread went to sleep so that we can +// avoid sampling it more than once while it's sleeping. Calling +// profiler_thread_sleep() twice without an intervening profiler_thread_wake() +// is an error. All three functions operate the same whether the profiler is +// active or inactive. +void profiler_thread_sleep(); +void profiler_thread_wake(); + +// Mark a thread as asleep within a scope. +// (See also AUTO_PROFILER_THREAD_WAKE in ProfilerThreadState.h) +# define AUTO_PROFILER_THREAD_SLEEP \ + mozilla::AutoProfilerThreadSleep PROFILER_RAII + +namespace mozilla { + +// (See also AutoProfilerThreadWake in ProfilerThreadState.h) +class MOZ_RAII AutoProfilerThreadSleep { + public: + explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); } + + ~AutoProfilerThreadSleep() { profiler_thread_wake(); } +}; + +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // ProfilerThreadSleep_h diff --git a/tools/profiler/public/ProfilerThreadState.h b/tools/profiler/public/ProfilerThreadState.h new file mode 100644 index 0000000000..6ac48e41dd --- /dev/null +++ b/tools/profiler/public/ProfilerThreadState.h @@ -0,0 +1,128 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains functions that give information about the Profiler state +// with regards to the current thread. + +#ifndef ProfilerThreadState_h +#define ProfilerThreadState_h + +#include "mozilla/ProfilerState.h" +#include "mozilla/ProfilerThreadRegistration.h" +#include "mozilla/ProfilerThreadRegistry.h" +#include "mozilla/ProfilerThreadSleep.h" + +// During profiling, if the current thread is registered, return true +// (regardless of whether it is actively being profiled). +// (Same caveats and recommended usage as profiler_is_active().) +[[nodiscard]] inline bool profiler_is_active_and_thread_is_registered() { + return profiler_is_active() && + mozilla::profiler::ThreadRegistration::IsRegistered(); +} + +// Is the profiler active and unpaused, and is the current thread being +// profiled for any of the given features? (Same caveats and recommended usage +// as profiler_is_active().) +[[nodiscard]] inline bool profiler_thread_is_being_profiled( + ThreadProfilingFeatures aThreadProfilingFeatures) { + return profiler_is_active_and_unpaused() && + mozilla::profiler::ThreadRegistration::WithOnThreadRefOr( + [aThreadProfilingFeatures]( + mozilla::profiler::ThreadRegistration::OnThreadRef aTR) { + return DoFeaturesIntersect( + aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(), + aThreadProfilingFeatures); + }, + false); +} + +// Is the profiler active and unpaused, and is the given thread being profiled? +// (Same caveats and recommended usage as profiler_is_active().) +// Safe to use with the current thread id, or unspecified ProfilerThreadId (same +// as current thread id). +[[nodiscard]] inline bool profiler_thread_is_being_profiled( + const ProfilerThreadId& aThreadId, + ThreadProfilingFeatures aThreadProfilingFeatures) { + if (!profiler_is_active_and_unpaused()) { + return false; + } + + if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) { + // For the current thread id, use the ThreadRegistration directly, it is + // more efficient. + return mozilla::profiler::ThreadRegistration::WithOnThreadRefOr( + [aThreadProfilingFeatures]( + mozilla::profiler::ThreadRegistration::OnThreadRef aTR) { + return DoFeaturesIntersect( + aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(), + aThreadProfilingFeatures); + }, + false); + } + + // For other threads, go through the ThreadRegistry. + return mozilla::profiler::ThreadRegistry::WithOffThreadRefOr( + aThreadId, + [aThreadProfilingFeatures]( + mozilla::profiler::ThreadRegistry::OffThreadRef aTR) { + return DoFeaturesIntersect( + aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(), + aThreadProfilingFeatures); + }, + false); +} + +// Is the current thread registered and sleeping? +[[nodiscard]] inline bool profiler_thread_is_sleeping() { + return profiler_is_active() && + mozilla::profiler::ThreadRegistration::WithOnThreadRefOr( + [](mozilla::profiler::ThreadRegistration::OnThreadRef aTR) { + return aTR.UnlockedConstReaderAndAtomicRWCRef().IsSleeping(); + }, + false); +} + +#ifndef MOZ_GECKO_PROFILER + +# define AUTO_PROFILER_THREAD_WAKE + +#else // !MOZ_GECKO_PROFILER + +// Mark a thread as awake within a scope. +// (See also AUTO_PROFILER_THREAD_SLEEP in mozilla/ProfilerThreadSleep.h) +# define AUTO_PROFILER_THREAD_WAKE \ + mozilla::AutoProfilerThreadWake PROFILER_RAII + +namespace mozilla { + +// Temporarily wake up the profiling of a thread while servicing events such as +// Asynchronous Procedure Calls (APCs). +// (See also AutoProfilerThreadSleep in ProfilerThreadSleep.h) +class MOZ_RAII AutoProfilerThreadWake { + public: + explicit AutoProfilerThreadWake() + : mIssuedWake(profiler_thread_is_sleeping()) { + if (mIssuedWake) { + profiler_thread_wake(); + } + } + + ~AutoProfilerThreadWake() { + if (mIssuedWake) { + MOZ_ASSERT(!profiler_thread_is_sleeping()); + profiler_thread_sleep(); + } + } + + private: + bool mIssuedWake; +}; + +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // ProfilerThreadState_h diff --git a/tools/profiler/public/ProfilerUtils.h b/tools/profiler/public/ProfilerUtils.h new file mode 100644 index 0000000000..3969761e18 --- /dev/null +++ b/tools/profiler/public/ProfilerUtils.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfilerUtils_h +#define ProfilerUtils_h + +// This header contains most process- and thread-related functions. +// It is safe to include unconditionally. + +#include "mozilla/BaseProfilerUtils.h" + +using ProfilerProcessId = mozilla::baseprofiler::BaseProfilerProcessId; +using ProfilerThreadId = mozilla::baseprofiler::BaseProfilerThreadId; + +// Get the current process's ID. +[[nodiscard]] ProfilerProcessId profiler_current_process_id(); + +// Get the current thread's ID. +[[nodiscard]] ProfilerThreadId profiler_current_thread_id(); + +// Must be called at least once from the main thread, before any other main- +// thread id function. +void profiler_init_main_thread_id(); + +[[nodiscard]] ProfilerThreadId profiler_main_thread_id(); + +[[nodiscard]] bool profiler_is_main_thread(); + +#endif // ProfilerUtils_h diff --git a/tools/profiler/public/shared-libraries.h b/tools/profiler/public/shared-libraries.h new file mode 100644 index 0000000000..dfd3599e71 --- /dev/null +++ b/tools/profiler/public/shared-libraries.h @@ -0,0 +1,213 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef SHARED_LIBRARIES_H_ +#define SHARED_LIBRARIES_H_ + +#ifndef MOZ_GECKO_PROFILER +# error This header does not have a useful implementation on your platform! +#endif + +#include "nsNativeCharsetUtils.h" +#include "nsString.h" +#include + +#include +#include +#include +#include +#include + +namespace IPC { +class MessageReader; +class MessageWriter; +template +struct ParamTraits; +} // namespace IPC + +class SharedLibrary { + public: + SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset, + const nsCString& aBreakpadId, const nsCString& aCodeId, + const nsString& aModuleName, const nsString& aModulePath, + const nsString& aDebugName, const nsString& aDebugPath, + const nsCString& aVersion, const char* aArch) + : mStart(aStart), + mEnd(aEnd), + mOffset(aOffset), + mBreakpadId(aBreakpadId), + mCodeId(aCodeId), + mModuleName(aModuleName), + mModulePath(aModulePath), + mDebugName(aDebugName), + mDebugPath(aDebugPath), + mVersion(aVersion), + mArch(aArch) {} + + bool operator==(const SharedLibrary& other) const { + return (mStart == other.mStart) && (mEnd == other.mEnd) && + (mOffset == other.mOffset) && (mModuleName == other.mModuleName) && + (mModulePath == other.mModulePath) && + (mDebugName == other.mDebugName) && + (mDebugPath == other.mDebugPath) && + (mBreakpadId == other.mBreakpadId) && (mCodeId == other.mCodeId) && + (mVersion == other.mVersion) && (mArch == other.mArch); + } + + uintptr_t GetStart() const { return mStart; } + uintptr_t GetEnd() const { return mEnd; } + uintptr_t GetOffset() const { return mOffset; } + const nsCString& GetBreakpadId() const { return mBreakpadId; } + const nsCString& GetCodeId() const { return mCodeId; } + const nsString& GetModuleName() const { return mModuleName; } + const nsString& GetModulePath() const { return mModulePath; } + const std::string GetNativeDebugPath() const { + nsAutoCString debugPathStr; + + NS_CopyUnicodeToNative(mDebugPath, debugPathStr); + + return debugPathStr.get(); + } + const nsString& GetDebugName() const { return mDebugName; } + const nsString& GetDebugPath() const { return mDebugPath; } + const nsCString& GetVersion() const { return mVersion; } + const std::string& GetArch() const { return mArch; } + size_t SizeOf() const { + return sizeof *this + mBreakpadId.Length() + mCodeId.Length() + + mModuleName.Length() * 2 + mModulePath.Length() * 2 + + mDebugName.Length() * 2 + mDebugPath.Length() * 2 + + mVersion.Length() + mArch.size(); + } + + SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {} + + private: + uintptr_t mStart; + uintptr_t mEnd; + uintptr_t mOffset; + nsCString mBreakpadId; + // A string carrying an identifier for a binary. + // + // All platforms have different formats: + // - Windows: The code ID for a Windows PE file. + // It's the PE timestamp and PE image size. + // - macOS: The code ID for a macOS / iOS binary (mach-O). + // It's the mach-O UUID without dashes and without the trailing 0 for the + // breakpad ID. + // - Linux/Android: The code ID for a Linux ELF file. + // It's the complete build ID, as hex string. + nsCString mCodeId; + nsString mModuleName; + nsString mModulePath; + nsString mDebugName; + nsString mDebugPath; + nsCString mVersion; + std::string mArch; + + friend struct IPC::ParamTraits; +}; + +static bool CompareAddresses(const SharedLibrary& first, + const SharedLibrary& second) { + return first.GetStart() < second.GetStart(); +} + +class SharedLibraryInfo { + public: + static SharedLibraryInfo GetInfoForSelf(); +#ifdef XP_WIN + static SharedLibraryInfo GetInfoFromPath(const wchar_t* aPath); +#endif + + static void Initialize(); + + void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); } + + void AddAllSharedLibraries(const SharedLibraryInfo& sharedLibraryInfo) { + mEntries.insert(mEntries.end(), sharedLibraryInfo.mEntries.begin(), + sharedLibraryInfo.mEntries.end()); + } + + const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; } + + SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; } + + // Removes items in the range [first, last) + // i.e. element at the "last" index is not removed + void RemoveEntries(size_t first, size_t last) { + mEntries.erase(mEntries.begin() + first, mEntries.begin() + last); + } + + bool Contains(const SharedLibrary& searchItem) const { + return (mEntries.end() != + std::find(mEntries.begin(), mEntries.end(), searchItem)); + } + + size_t GetSize() const { return mEntries.size(); } + + void SortByAddress() { + std::sort(mEntries.begin(), mEntries.end(), CompareAddresses); + } + + // Remove duplicate entries from the vector. + // + // We purposefully don't use the operator== implementation of SharedLibrary + // because it compares all the fields including mStart, mEnd and mOffset which + // are not the same across different processes. + void DeduplicateEntries() { + static auto cmpSort = [](const SharedLibrary& a, const SharedLibrary& b) { + return std::tie(a.GetModuleName(), a.GetBreakpadId()) < + std::tie(b.GetModuleName(), b.GetBreakpadId()); + }; + static auto cmpEqual = [](const SharedLibrary& a, const SharedLibrary& b) { + return std::tie(a.GetModuleName(), a.GetBreakpadId()) == + std::tie(b.GetModuleName(), b.GetBreakpadId()); + }; + // std::unique requires the vector to be sorted first. It can only remove + // consecutive duplicate elements. + std::sort(mEntries.begin(), mEntries.end(), cmpSort); + // Remove the duplicates since it's sorted now. + mEntries.erase(std::unique(mEntries.begin(), mEntries.end(), cmpEqual), + mEntries.end()); + } + + void Clear() { mEntries.clear(); } + + size_t SizeOf() const { + size_t size = 0; + + for (const auto& item : mEntries) { + size += item.SizeOf(); + } + + return size; + } + + private: + std::vector mEntries; + + friend struct IPC::ParamTraits; +}; + +namespace IPC { +template <> +struct ParamTraits { + typedef SharedLibrary paramType; + + static void Write(MessageWriter* aWriter, const paramType& aParam); + static bool Read(MessageReader* aReader, paramType* aResult); +}; + +template <> +struct ParamTraits { + typedef SharedLibraryInfo paramType; + + static void Write(MessageWriter* aWriter, const paramType& aParam); + static bool Read(MessageReader* aReader, paramType* aResult); +}; +} // namespace IPC + +#endif diff --git a/tools/profiler/rust-api/Cargo.toml b/tools/profiler/rust-api/Cargo.toml new file mode 100644 index 0000000000..93800051e4 --- /dev/null +++ b/tools/profiler/rust-api/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "gecko-profiler" +version = "0.1.0" +authors = ["The Mozilla Project Developers"] +edition = "2018" +license = "MPL-2.0" + +[dependencies] +profiler-macros = { path = "./macros" } +lazy_static = "1" +serde = { version = "1.0", features = ["derive"] } +bincode = "1" +mozbuild = "0.1" + +[build-dependencies] +lazy_static = "1" +bindgen = {version = "0.64", default-features = false} +mozbuild = "0.1" + +[features] +# This feature is being set by Gecko. If it's not set, all public functions and +# structs will be no-op. +enabled = [] diff --git a/tools/profiler/rust-api/README.md b/tools/profiler/rust-api/README.md new file mode 100644 index 0000000000..60926a85c7 --- /dev/null +++ b/tools/profiler/rust-api/README.md @@ -0,0 +1,5 @@ +# Gecko Profiler API for Rust + +This crate is the collection of all the API endpoints for Gecko Profiler. Please use this crate instead of using raw FFI calls. + +See the module documentations for more information about the specific API endpoints. diff --git a/tools/profiler/rust-api/build.rs b/tools/profiler/rust-api/build.rs new file mode 100644 index 0000000000..2dd70ed55c --- /dev/null +++ b/tools/profiler/rust-api/build.rs @@ -0,0 +1,118 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Build script for the Gecko Profiler bindings. +//! +//! This file is executed by cargo when this crate is built. It generates the +//! `$OUT_DIR/bindings.rs` file which is then included by `src/gecko_bindings/mod.rs`. + +#[macro_use] +extern crate lazy_static; + +use bindgen::{Builder, CargoCallbacks, CodegenConfig}; +use std::env; +use std::fs; +use std::path::PathBuf; + +lazy_static! { + static ref OUTDIR_PATH: PathBuf = PathBuf::from(env::var_os("OUT_DIR").unwrap()).join("gecko"); +} + +const BINDINGS_FILE: &str = "bindings.rs"; + +lazy_static! { + static ref BINDGEN_FLAGS: Vec = { + // Load build-specific config overrides. + let path = mozbuild::TOPOBJDIR.join("tools/profiler/rust-api/extra-bindgen-flags"); + println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); + fs::read_to_string(path).expect("Failed to read extra-bindgen-flags file") + .split_whitespace() + .map(std::borrow::ToOwned::to_owned) + .collect() + }; + static ref SEARCH_PATHS: Vec = vec![ + mozbuild::TOPOBJDIR.join("dist/include"), + mozbuild::TOPOBJDIR.join("dist/include/nspr"), + ]; +} + +fn search_include(name: &str) -> Option { + for path in SEARCH_PATHS.iter() { + let file = path.join(name); + if file.is_file() { + return Some(file); + } + } + None +} + +fn add_include(name: &str) -> String { + let file = match search_include(name) { + Some(file) => file, + None => panic!("Include not found: {}", name), + }; + let file_path = String::from(file.to_str().unwrap()); + println!("cargo:rerun-if-changed={}", file_path); + file_path +} + +fn generate_bindings() { + let mut builder = Builder::default() + .enable_cxx_namespaces() + .with_codegen_config(CodegenConfig::TYPES | CodegenConfig::VARS | CodegenConfig::FUNCTIONS) + .disable_untagged_union() + .size_t_is_usize(true); + + for dir in SEARCH_PATHS.iter() { + builder = builder.clang_arg("-I").clang_arg(dir.to_str().unwrap()); + } + + builder = builder + .clang_arg("-include") + .clang_arg(add_include("mozilla-config.h")); + + for item in &*BINDGEN_FLAGS { + builder = builder.clang_arg(item); + } + + let bindings = builder + .header(add_include("GeckoProfiler.h")) + .header(add_include("ProfilerBindings.h")) + .allowlist_function("gecko_profiler_.*") + .allowlist_var("mozilla::profiler::detail::RacyFeatures::sActiveAndFeatures") + .allowlist_type("mozilla::profiler::detail::RacyFeatures") + .rustified_enum("mozilla::StackCaptureOptions") + .rustified_enum("mozilla::MarkerSchema_Location") + .rustified_enum("mozilla::MarkerSchema_Format") + .rustified_enum("mozilla::MarkerSchema_Searchable") + // Converting std::string to an opaque type makes some platforms build + // successfully. Otherwise, it fails to build because MarkerSchema has + // some std::strings as its fields. + .opaque_type("std::string") + // std::vector needs to be converted to an opaque type because, if it's + // not an opaque type, bindgen can't find its size properly and + // MarkerSchema's total size reduces. That causes a heap buffer overflow. + .opaque_type("std::vector") + .raw_line("pub use self::root::*;") + // Tell cargo to invalidate the built crate whenever any of the + // included header files changed. + .parse_callbacks(Box::new(CargoCallbacks)) + // Finish the builder and generate the bindings. + .generate() + // Unwrap the Result and panic on failure. + .expect("Unable to generate bindings"); + + let out_file = OUTDIR_PATH.join(BINDINGS_FILE); + bindings + .write_to_file(out_file) + .expect("Couldn't write bindings!"); +} + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:out_dir={}", env::var("OUT_DIR").unwrap()); + + fs::create_dir_all(&*OUTDIR_PATH).unwrap(); + generate_bindings(); +} diff --git a/tools/profiler/rust-api/cbindgen.toml b/tools/profiler/rust-api/cbindgen.toml new file mode 100644 index 0000000000..3f0df0f34f --- /dev/null +++ b/tools/profiler/rust-api/cbindgen.toml @@ -0,0 +1,15 @@ +header = """/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */""" +autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */ +#ifndef ProfilerRustBindings_h +#error "Don't include this file directly, instead include ProfilerRustBindings.h" +#endif +""" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +# Put FFI calls in the `mozilla::profiler::ffi` namespace. +namespaces = ["mozilla", "profiler", "ffi"] diff --git a/tools/profiler/rust-api/extra-bindgen-flags.in b/tools/profiler/rust-api/extra-bindgen-flags.in new file mode 100644 index 0000000000..b0275a031b --- /dev/null +++ b/tools/profiler/rust-api/extra-bindgen-flags.in @@ -0,0 +1 @@ +@BINDGEN_SYSTEM_FLAGS@ @NSPR_CFLAGS@ diff --git a/tools/profiler/rust-api/macros/Cargo.toml b/tools/profiler/rust-api/macros/Cargo.toml new file mode 100644 index 0000000000..b8bd9910dc --- /dev/null +++ b/tools/profiler/rust-api/macros/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "profiler-macros" +version = "0.1.0" +authors = ["The Mozilla Project Developers"] +edition = "2018" +license = "MPL-2.0" + +[lib] +proc-macro = true + +[dependencies] +syn = "1" +quote = "1.0" diff --git a/tools/profiler/rust-api/macros/src/lib.rs b/tools/profiler/rust-api/macros/src/lib.rs new file mode 100644 index 0000000000..48617b758e --- /dev/null +++ b/tools/profiler/rust-api/macros/src/lib.rs @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#![deny(warnings)] + +//! A procedural macro as a syntactical sugar to `gecko_profiler_label!` macro. +//! You can use this macro on top of functions to automatically append the +//! label frame to the function. +//! +//! Example usage: +//! ```rust +//! #[gecko_profiler_fn_label(DOM)] +//! fn foo(bar: u32) -> u32 { +//! bar +//! } +//! +//! #[gecko_profiler_fn_label(Javascript, IonMonkey)] +//! pub fn bar(baz: i8) -> i8 { +//! baz +//! } +//! ``` +//! +//! See the documentation of `gecko_profiler_label!` macro to learn more about +//! its parameters. + +extern crate proc_macro; + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, AttributeArgs, ItemFn}; + +#[proc_macro_attribute] +pub fn gecko_profiler_fn_label(attrs: TokenStream, input: TokenStream) -> TokenStream { + let attr_args = parse_macro_input!(attrs as AttributeArgs); + let input = parse_macro_input!(input as ItemFn); + + if attr_args.is_empty() || attr_args.len() > 2 { + panic!("Expected one or two arguments as ProfilingCategory or ProfilingCategoryPair but {} arguments provided!", attr_args.len()); + } + + let category_name = &attr_args[0]; + // Try to get the subcategory if possible. Otherwise, use `None`. + let subcategory_if_provided = match attr_args.get(1) { + Some(subcategory) => quote!(, #subcategory), + None => quote!(), + }; + + let ItemFn { + attrs, + vis, + sig, + block, + } = input; + let stmts = &block.stmts; + + let new_fn = quote! { + #(#attrs)* #vis #sig { + gecko_profiler_label!(#category_name#subcategory_if_provided); + #(#stmts)* + } + }; + + new_fn.into() +} diff --git a/tools/profiler/rust-api/src/gecko_bindings/glue.rs b/tools/profiler/rust-api/src/gecko_bindings/glue.rs new file mode 100644 index 0000000000..531f727a00 --- /dev/null +++ b/tools/profiler/rust-api/src/gecko_bindings/glue.rs @@ -0,0 +1,53 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::gecko_bindings::{bindings, structs::mozilla}; +use crate::json_writer::JSONWriter; +use crate::marker::deserializer_tags_state::{ + get_marker_type_functions_read_guard, MarkerTypeFunctions, +}; +use std::ops::DerefMut; +use std::os::raw::{c_char, c_void}; + +#[no_mangle] +pub unsafe extern "C" fn gecko_profiler_serialize_marker_for_tag( + deserializer_tag: u8, + payload: *const u8, + payload_size: usize, + json_writer: &mut mozilla::baseprofiler::SpliceableJSONWriter, +) { + let marker_type_functions = get_marker_type_functions_read_guard(); + let &MarkerTypeFunctions { + transmute_and_stream_fn, + marker_type_name_fn, + .. + } = marker_type_functions.get(deserializer_tag); + let mut json_writer = JSONWriter::new(&mut *json_writer); + + // Serialize the marker type name first. + json_writer.string_property("type", marker_type_name_fn()); + // Serialize the marker payload now. + transmute_and_stream_fn(payload, payload_size, &mut json_writer); +} + +#[no_mangle] +pub unsafe extern "C" fn gecko_profiler_stream_marker_schemas( + json_writer: &mut mozilla::baseprofiler::SpliceableJSONWriter, + streamed_names_set: *mut c_void, +) { + let marker_type_functions = get_marker_type_functions_read_guard(); + + for funcs in marker_type_functions.iter() { + let marker_name = (funcs.marker_type_name_fn)(); + let mut marker_schema = (funcs.marker_type_display_fn)(); + + bindings::gecko_profiler_marker_schema_stream( + json_writer, + marker_name.as_ptr() as *const c_char, + marker_name.len(), + marker_schema.pin.deref_mut().as_mut_ptr(), + streamed_names_set, + ) + } +} diff --git a/tools/profiler/rust-api/src/gecko_bindings/mod.rs b/tools/profiler/rust-api/src/gecko_bindings/mod.rs new file mode 100644 index 0000000000..f1ec667bb2 --- /dev/null +++ b/tools/profiler/rust-api/src/gecko_bindings/mod.rs @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Gecko's C++ bindings for the profiler. + +#[allow( + dead_code, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + missing_docs +)] +pub mod structs { + include!(concat!(env!("OUT_DIR"), "/gecko/bindings.rs")); +} + +pub use self::structs as bindings; + +mod glue; +pub mod profiling_categories; diff --git a/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs b/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs new file mode 100644 index 0000000000..0f24aa9c35 --- /dev/null +++ b/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs @@ -0,0 +1,32 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! This file contains the generated ProfilingCategory and ProfilingCategoryPair enums. +//! +//! The contents of this module are generated by +//! `mozglue/baseprofiler/generate_profiling_categories.py`, from +//! 'mozglue/baseprofiler/core/profiling_categories.yaml`. + +include!(mozbuild::objdir_path!( + "tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs" +)); + +/// Helper macro that returns the profiling category pair from either only +/// "category", or "category + sub category" pair. Refer to `profiling_categories.yaml` +/// or generated `profiling_categories.rs` to see all the marker categories. +/// This is useful to make the APIs similar to each other since +/// `gecko_profiler_label!` API also requires the same syntax. +/// +/// Example usages: +/// - `gecko_profiler_category!(DOM)` +/// - `gecko_profiler_category!(JavaScript, Parsing)` +#[macro_export] +macro_rules! gecko_profiler_category { + ($category:ident) => { + $crate::ProfilingCategoryPair::$category(None) + }; + ($category:ident, $subcategory:ident) => { + $crate::ProfilingCategoryPair::$category(Some($crate::$category::$subcategory)) + }; +} diff --git a/tools/profiler/rust-api/src/json_writer.rs b/tools/profiler/rust-api/src/json_writer.rs new file mode 100644 index 0000000000..8ab6f2ed99 --- /dev/null +++ b/tools/profiler/rust-api/src/json_writer.rs @@ -0,0 +1,86 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Gecko JSON writer support for marker API. + +use crate::gecko_bindings::{bindings, structs::mozilla}; +use std::os::raw::c_char; + +/// Wrapper for the C++ SpliceableJSONWriter object. It exposes some methods to +/// add various properties to the JSON. +#[derive(Debug)] +pub struct JSONWriter<'a>(&'a mut mozilla::baseprofiler::SpliceableJSONWriter); + +impl<'a> JSONWriter<'a> { + /// Constructor for the JSONWriter object. It takes a C++ SpliceableJSONWriter + /// reference as its argument and stores it for later accesses. + pub(crate) fn new(json_writer: &'a mut mozilla::baseprofiler::SpliceableJSONWriter) -> Self { + JSONWriter(json_writer) + } + + /// Adds an int property to the JSON. + /// Prints: "": + pub fn int_property(&mut self, name: &str, value: i64) { + unsafe { + bindings::gecko_profiler_json_writer_int_property( + self.0, + name.as_ptr() as *const c_char, + name.len(), + value, + ); + } + } + + /// Adds a float property to the JSON. + /// Prints: "": + pub fn float_property(&mut self, name: &str, value: f64) { + unsafe { + bindings::gecko_profiler_json_writer_float_property( + self.0, + name.as_ptr() as *const c_char, + name.len(), + value, + ); + } + } + + /// Adds an bool property to the JSON. + /// Prints: "": + pub fn bool_property(&mut self, name: &str, value: bool) { + unsafe { + bindings::gecko_profiler_json_writer_bool_property( + self.0, + name.as_ptr() as *const c_char, + name.len(), + value, + ); + } + } + + /// Adds a string property to the JSON. + /// Prints: "": "" + pub fn string_property(&mut self, name: &str, value: &str) { + unsafe { + bindings::gecko_profiler_json_writer_string_property( + self.0, + name.as_ptr() as *const c_char, + name.len(), + value.as_ptr() as *const c_char, + value.len(), + ); + } + } + + /// Adds a null property to the JSON. + /// Prints: "": null + pub fn null_property(&mut self, name: &str) { + unsafe { + bindings::gecko_profiler_json_writer_null_property( + self.0, + name.as_ptr() as *const c_char, + name.len(), + ); + } + } +} diff --git a/tools/profiler/rust-api/src/label.rs b/tools/profiler/rust-api/src/label.rs new file mode 100644 index 0000000000..10970c90ad --- /dev/null +++ b/tools/profiler/rust-api/src/label.rs @@ -0,0 +1,137 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Gecko profiler label support. +//! +//! Use the `profiler_label!` macro directly instead of using `AutoProfilerLabel`. +//! See the `profiler_label!` macro documentation on how to use it. + +#[cfg(feature = "enabled")] +use crate::gecko_bindings::{ + bindings, profiling_categories::ProfilingCategoryPair, structs::mozilla, +}; + +/// RAII object that constructs and destroys a C++ AutoProfilerLabel object +/// pointed to be the specified reference. +/// Use `profiler_label!` macro directly instead of this, if possible. +#[cfg(feature = "enabled")] +pub struct AutoProfilerLabel<'a>(&'a mut mozilla::AutoProfilerLabel); + +#[cfg(feature = "enabled")] +impl<'a> AutoProfilerLabel<'a> { + /// Creates a new AutoProfilerLabel with the specified label type. + /// + /// unsafe since the caller must ensure that `label` is allocated on the + /// stack. + #[inline] + pub unsafe fn new( + label: &mut std::mem::MaybeUninit, + category_pair: ProfilingCategoryPair, + ) -> AutoProfilerLabel { + bindings::gecko_profiler_construct_label( + label.as_mut_ptr(), + category_pair.to_cpp_enum_value(), + ); + AutoProfilerLabel(&mut *label.as_mut_ptr()) + } +} + +#[cfg(feature = "enabled")] +impl<'a> Drop for AutoProfilerLabel<'a> { + #[inline] + fn drop(&mut self) { + unsafe { + bindings::gecko_profiler_destruct_label(self.0); + } + } +} + +/// Place a Gecko profiler label on the stack. +/// +/// The first `category` argument must be the name of a variant of `ProfilerLabelCategoryPair` +/// and the second optional `subcategory` argument must be one of the sub variants of +/// `ProfilerLabelCategoryPair`. All options can be seen either in the +/// profiling_categories.yaml file or generated profiling_categories.rs file. +/// +/// Example usage: +/// ```rust +/// gecko_profiler_label!(Layout); +/// gecko_profiler_label!(JavaScript, Parsing); +/// ``` +/// You can wrap this macro with a block to only label a specific part of a function. +#[cfg(feature = "enabled")] +#[macro_export] +macro_rules! gecko_profiler_label { + ($category:ident) => { + gecko_profiler_label!($crate::ProfilingCategoryPair::$category(None)) + }; + ($category:ident, $subcategory:ident) => { + gecko_profiler_label!($crate::ProfilingCategoryPair::$category(Some( + $crate::$category::$subcategory + ))) + }; + + ($category_path:expr) => { + let mut _profiler_label = ::std::mem::MaybeUninit::< + $crate::gecko_bindings::structs::mozilla::AutoProfilerLabel, + >::uninit(); + let _profiler_label = if $crate::is_active() { + unsafe { + Some($crate::AutoProfilerLabel::new( + &mut _profiler_label, + $category_path, + )) + } + } else { + None + }; + }; +} + +/// No-op when MOZ_GECKO_PROFILER is not defined. +#[cfg(not(feature = "enabled"))] +#[macro_export] +macro_rules! gecko_profiler_label { + ($category:ident) => {}; + ($category:ident, $subcategory:ident) => {}; +} + +#[cfg(test)] +mod tests { + use profiler_macros::gecko_profiler_fn_label; + + #[test] + fn test_gecko_profiler_label() { + gecko_profiler_label!(Layout); + gecko_profiler_label!(JavaScript, Parsing); + } + + #[gecko_profiler_fn_label(DOM)] + fn foo(bar: u32) -> u32 { + bar + } + + #[gecko_profiler_fn_label(Javascript, IonMonkey)] + pub(self) fn bar(baz: i8) -> i8 { + baz + } + + struct A; + + impl A { + #[gecko_profiler_fn_label(Idle)] + pub fn test(&self) -> i8 { + 1 + } + } + + #[test] + fn test_gecko_profiler_fn_label() { + let _: u32 = foo(100000); + let _: i8 = bar(127); + + let a = A; + let _ = a.test(100); + } +} diff --git a/tools/profiler/rust-api/src/lib.rs b/tools/profiler/rust-api/src/lib.rs new file mode 100644 index 0000000000..3c857ae8ac --- /dev/null +++ b/tools/profiler/rust-api/src/lib.rs @@ -0,0 +1,29 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +///! Profiler Rust API + +#[macro_use] +extern crate lazy_static; + +pub mod gecko_bindings; +mod json_writer; +mod label; +mod marker; +mod profiler_state; +mod thread; +mod time; + +pub use gecko_bindings::profiling_categories::*; +pub use json_writer::*; +pub use label::*; +pub use marker::options::*; +pub use marker::schema::MarkerSchema; +pub use marker::*; +pub use profiler_macros::gecko_profiler_fn_label; +pub use profiler_state::*; +pub use thread::*; +pub use time::*; + +pub use serde::{Deserialize, Serialize}; diff --git a/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs b/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs new file mode 100644 index 0000000000..890cc3f263 --- /dev/null +++ b/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs @@ -0,0 +1,116 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::json_writer::JSONWriter; +use crate::marker::schema::MarkerSchema; +use crate::marker::{transmute_and_stream, ProfilerMarker}; +use std::collections::HashMap; +use std::sync::{RwLock, RwLockReadGuard}; + +lazy_static! { + static ref DESERIALIZER_TAGS_STATE: RwLock = + RwLock::new(DeserializerTagsState::new()); +} + +/// A state that keeps track of each marker types and their deserializer tags. +/// They are added during the marker insertion and read during the marker serialization. +pub struct DeserializerTagsState { + /// C++ side accepts only u8 values, but we only know usize values as the + /// unique marker type values. So, we need to keep track of each + /// "marker tag -> deserializer tag" conversions to directly get the + /// deserializer tags of the already added marker types. + pub marker_tag_to_deserializer_tag: HashMap, + /// Vector of marker type functions. + /// 1-based, i.e.: [0] -> tag 1. Elements are pushed to the end of the vector + /// whenever a new marker type is used in a Firefox session; the content is + /// kept between profiler runs in that session. On the C++ side, we have the + /// same algorithm (althought it's a sized array). See `sMarkerTypeFunctions1Based`. + pub marker_type_functions_1_based: Vec, +} + +/// Functions that will be stored per marker type, so we can serialize the marker +/// schema and stream the marker payload for a specific type. +pub struct MarkerTypeFunctions { + /// A function that returns the name of the marker type. + pub marker_type_name_fn: fn() -> &'static str, + /// A function that returns a `MarkerSchema`, which contains all the + /// information needed to stream the display schema associated with a + /// marker type. + pub marker_type_display_fn: fn() -> MarkerSchema, + /// A function that can read a serialized payload from bytes and streams it + /// as JSON object properties. + pub transmute_and_stream_fn: + unsafe fn(payload: *const u8, payload_size: usize, json_writer: &mut JSONWriter), +} + +impl DeserializerTagsState { + fn new() -> Self { + DeserializerTagsState { + marker_tag_to_deserializer_tag: HashMap::new(), + marker_type_functions_1_based: vec![], + } + } +} + +/// Get or insert the deserializer tag for each marker type. The tag storage +/// is limited to 255 marker types. This is the same with the C++ side. It's +/// unlikely to reach to this limit, but if that's the case, C++ side needs +/// to change the uint8_t type for the deserializer tag as well. +pub fn get_or_insert_deserializer_tag() -> u8 +where + T: ProfilerMarker, +{ + let unique_marker_tag = &T::marker_type_name as *const _ as usize; + let mut state = DESERIALIZER_TAGS_STATE.write().unwrap(); + + match state.marker_tag_to_deserializer_tag.get(&unique_marker_tag) { + None => { + // It's impossible to have length more than u8. + let deserializer_tag = state.marker_type_functions_1_based.len() as u8 + 1; + debug_assert!( + deserializer_tag < 250, + "Too many rust marker payload types! Please consider increasing the profiler \ + buffer tag size." + ); + + state + .marker_tag_to_deserializer_tag + .insert(unique_marker_tag, deserializer_tag); + state + .marker_type_functions_1_based + .push(MarkerTypeFunctions { + marker_type_name_fn: T::marker_type_name, + marker_type_display_fn: T::marker_type_display, + transmute_and_stream_fn: transmute_and_stream::, + }); + deserializer_tag + } + Some(deserializer_tag) => *deserializer_tag, + } +} + +/// A guard that will be used by the marker FFI functions for getting marker type functions. +pub struct MarkerTypeFunctionsReadGuard { + guard: RwLockReadGuard<'static, DeserializerTagsState>, +} + +impl MarkerTypeFunctionsReadGuard { + pub fn iter<'a>(&'a self) -> impl Iterator { + self.guard.marker_type_functions_1_based.iter() + } + + pub fn get<'a>(&'a self, deserializer_tag: u8) -> &'a MarkerTypeFunctions { + self.guard + .marker_type_functions_1_based + .get(deserializer_tag as usize - 1) + .expect("Failed to find the marker type functions for given deserializer tag") + } +} + +/// Locks the DESERIALIZER_TAGS_STATE and returns the marker type functions read guard. +pub fn get_marker_type_functions_read_guard() -> MarkerTypeFunctionsReadGuard { + MarkerTypeFunctionsReadGuard { + guard: DESERIALIZER_TAGS_STATE.read().unwrap(), + } +} diff --git a/tools/profiler/rust-api/src/marker/mod.rs b/tools/profiler/rust-api/src/marker/mod.rs new file mode 100644 index 0000000000..984a475089 --- /dev/null +++ b/tools/profiler/rust-api/src/marker/mod.rs @@ -0,0 +1,284 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! ## Gecko profiler marker support +//! +//! This marker API has a few different functions that you can use to mark a part of your code. +//! There are three main marker functions to use from Rust: [`add_untyped_marker`], +//! [`add_text_marker`] and [`add_marker`]. They are similar to what we have on +//! the C++ side. Please take a look at the marker documentation in the Firefox +//! source docs to learn more about them: +//! https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html +//! +//! ### Simple marker without any additional data +//! +//! The simplest way to add a marker without any additional information is the +//! [`add_untyped_marker`] API. You can use it to mark a part of the code with +//! only a name. E.g.: +//! +//! ``` +//! gecko_profiler::add_untyped_marker( +//! // Name of the marker as a string. +//! "Marker Name", +//! // Category with an optional sub-category. +//! gecko_profiler_category!(Graphics, DisplayListBuilding), +//! // MarkerOptions that keeps options like marker timing and marker stack. +//! Default::default(), +//! ); +//! ``` +//! +//! Please see the [`gecko_profiler_category!`], [`MarkerOptions`],[`MarkerTiming`] +//! and [`MarkerStack`] to learn more about these. +//! +//! You can also give explicit [`MarkerOptions`] value like these: +//! +//! ``` +//! // With both timing and stack fields: +//! MarkerOptions { timing: MarkerTiming::instant_now(), stack: MarkerStack::Full } +//! // Or with some fields as default: +//! MarkerOptions { timing: MarkerTiming::instant_now(), ..Default::default() } +//! ``` +//! +//! ### Marker with only an additional text for more information: +//! +//! The next and slightly more advanced API is [`add_text_marker`]. +//! This is used to add a marker name + a string value for extra information. +//! E.g.: +//! +//! ``` +//! let info = "info about this marker"; +//! ... +//! gecko_profiler::add_text_marker( +//! // Name of the marker as a string. +//! "Marker Name", +//! // Category with an optional sub-category. +//! gecko_profiler_category!(DOM), +//! // MarkerOptions that keeps options like marker timing and marker stack. +//! MarkerOptions { +//! timing: MarkerTiming::instant_now(), +//! ..Default::default() +//! }, +//! // Additional information as a string. +//! info, +//! ); +//! ``` +//! +//! ### Marker with a more complex payload and different visualization in the profiler front-end. +//! +//! [`add_marker`] is the most advanced API that you can use to add different types +//! of values as data to your marker and customize the visualization of that marker +//! in the profiler front-end (profiler.firefox.com). +//! +//! To be able to add a a marker, first you need to create your marker payload +//! struct in your codebase and implement the [`ProfilerMarker`] trait like this: +//! +//! ``` +//! #[derive(Serialize, Deserialize, Debug)] +//! pub struct TestMarker { +//! a: u32, +//! b: String, +//! } +//! +//! // Please see the documentation of [`ProfilerMarker`]. +//! impl gecko_profiler::ProfilerMarker for TestMarker { +//! fn marker_type_name() -> &'static str { +//! "marker type from rust" +//! } +//! fn marker_type_display() -> gecko_profiler::MarkerSchema { +//! use gecko_profiler::marker::schema::*; +//! let mut schema = MarkerSchema::new(&[Location::MarkerChart]); +//! schema.set_chart_label("Name: {marker.name}"); +//! schema.set_tooltip_label("{marker.data.a}"); +//! schema.add_key_label_format("a", "A Value", Format::Integer); +//! schema.add_key_label_format("b", "B Value", Format::String); +//! schema +//! } +//! fn stream_json_marker_data(&self, json_writer: &mut gecko_profiler::JSONWriter) { +//! json_writer.int_property("a", self.a.into()); +//! json_writer.string_property("b", &self.b); +//! } +//! } +//! ``` +//! +//! Once you've created this payload and implemented the [`ProfilerMarker`], you +//! can now add this marker in the code that you would like to measure. E.g.: +//! +//! ``` +//! gecko_profiler::add_marker( +//! // Name of the marker as a string. +//! "Marker Name", +//! // Category with an optional sub-category. +//! gecko_profiler_category!(Graphics, DisplayListBuilding), +//! // MarkerOptions that keeps options like marker timing and marker stack. +//! Default::default(), +//! // Marker payload. +//! TestMarker {a: 12, b: "hello".to_owned()}, +//! ); +//! ``` + +pub(crate) mod deserializer_tags_state; +pub mod options; +pub mod schema; + +pub use options::*; +pub use schema::MarkerSchema; + +use crate::gecko_bindings::{bindings, profiling_categories::ProfilingCategoryPair}; +use crate::json_writer::JSONWriter; +use crate::marker::deserializer_tags_state::get_or_insert_deserializer_tag; +use crate::marker::options::MarkerOptions; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use std::os::raw::c_char; + +/// Marker API to add a new simple marker without any payload. +/// Please see the module documentation on how to add a marker with this API. +pub fn add_untyped_marker(name: &str, category: ProfilingCategoryPair, mut options: MarkerOptions) { + if !crate::profiler_state::can_accept_markers() { + // Nothing to do. + return; + } + + unsafe { + bindings::gecko_profiler_add_marker_untyped( + name.as_ptr() as *const c_char, + name.len(), + category.to_cpp_enum_value(), + options.timing.0.as_mut_ptr(), + options.stack, + ) + } +} + +/// Marker API to add a new marker with additional text for details. +/// Please see the module documentation on how to add a marker with this API. +pub fn add_text_marker( + name: &str, + category: ProfilingCategoryPair, + mut options: MarkerOptions, + text: &str, +) { + if !crate::profiler_state::can_accept_markers() { + // Nothing to do. + return; + } + + unsafe { + bindings::gecko_profiler_add_marker_text( + name.as_ptr() as *const c_char, + name.len(), + category.to_cpp_enum_value(), + options.timing.0.as_mut_ptr(), + options.stack, + text.as_ptr() as *const c_char, + text.len(), + ) + } +} + +/// Trait that every profiler marker payload struct needs to implement. +/// This will tell the profiler back-end how to serialize it as json and +/// the front-end how to display the marker. +/// Please also see the documentation here: +/// https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html#how-to-define-new-marker-types +/// +/// - `marker_type_name`: Returns a static string as the marker type name. This +/// should be unique and it is used to keep track of the type of markers in the +/// profiler storage, and to identify them uniquely on the profiler front-end. +/// - `marker_type_display`: Where and how to display the marker and its data. +/// Returns a `MarkerSchema` object which will be forwarded to the profiler +/// front-end. +/// - `stream_json_marker_data`: Data specific to this marker type should be +/// serialized to JSON for the profiler front-end. All the common marker data +/// like marker name, category, timing will be serialized automatically. But +/// marker specific data should be serialized here. +pub trait ProfilerMarker: Serialize + DeserializeOwned { + /// A static method that returns the name of the marker type. + fn marker_type_name() -> &'static str; + /// A static method that returns a `MarkerSchema`, which contains all the + /// information needed to stream the display schema associated with a + /// marker type. + fn marker_type_display() -> schema::MarkerSchema; + /// A method that streams the marker payload data as JSON object properties. + /// Please see the [JSONWriter] struct to see its methods. + fn stream_json_marker_data(&self, json_writer: &mut JSONWriter); +} + +/// A function that deserializes the marker payload and streams it to the JSON. +unsafe fn transmute_and_stream( + payload: *const u8, + payload_size: usize, + json_writer: &mut JSONWriter, +) where + T: ProfilerMarker, +{ + let payload_slice = std::slice::from_raw_parts(payload, payload_size); + let payload: T = bincode::deserialize(&payload_slice).unwrap(); + payload.stream_json_marker_data(json_writer); +} + +/// Main marker API to add a new marker to profiler buffer. +/// Please see the module documentation on how to add a marker with this API. +pub fn add_marker( + name: &str, + category: ProfilingCategoryPair, + mut options: MarkerOptions, + payload: T, +) where + T: ProfilerMarker, +{ + if !crate::profiler_state::can_accept_markers() { + // Nothing to do. + return; + } + + let encoded_payload: Vec = bincode::serialize(&payload).unwrap(); + let payload_size = encoded_payload.len(); + let maker_tag = get_or_insert_deserializer_tag::(); + + unsafe { + bindings::gecko_profiler_add_marker( + name.as_ptr() as *const c_char, + name.len(), + category.to_cpp_enum_value(), + options.timing.0.as_mut_ptr(), + options.stack, + maker_tag, + encoded_payload.as_ptr(), + payload_size, + ) + } +} + +/// Tracing marker type for Rust code. +/// This must be kept in sync with the `mozilla::baseprofiler::markers::Tracing` +/// C++ counterpart. +#[derive(Serialize, Deserialize, Debug)] +pub struct Tracing(pub String); + +impl ProfilerMarker for Tracing { + fn marker_type_name() -> &'static str { + "tracing" + } + + fn stream_json_marker_data(&self, json_writer: &mut JSONWriter) { + if self.0.len() != 0 { + json_writer.string_property("category", &self.0); + } + } + + // Tracing marker is a bit special because we have the same schema in the + // C++ side. This function will only get called when no Tracing markers are + // generated from the C++ side. But, most of the time, this will not be called + // when there is another C++ Tracing marker. + fn marker_type_display() -> schema::MarkerSchema { + use crate::marker::schema::*; + let mut schema = MarkerSchema::new(&[ + Location::MarkerChart, + Location::MarkerTable, + Location::TimelineOverview, + ]); + schema.add_key_label_format("category", "Type", Format::String); + schema + } +} diff --git a/tools/profiler/rust-api/src/marker/options.rs b/tools/profiler/rust-api/src/marker/options.rs new file mode 100644 index 0000000000..a5d4e11094 --- /dev/null +++ b/tools/profiler/rust-api/src/marker/options.rs @@ -0,0 +1,138 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Different options for the marker API. +//! See [`MarkerOptions`] and its fields. + +use crate::gecko_bindings::{bindings, structs::mozilla}; +use crate::ProfilerTime; +use std::mem::MaybeUninit; + +/// Marker option that contains marker timing information. +/// This class encapsulates the logic for correctly storing a marker based on its +/// constructor types. Use the static methods to create the MarkerTiming. This is +/// a transient object that is being used to enforce the constraints of the +/// combinations of the data. +/// +/// Implementation details: This is a RAII object that constructs and destroys a +/// C++ MarkerTiming object pointed to a specified reference. It allocates the +/// marker timing on stack and it's safe to move around because it's a +/// trivially-copyable object that only contains a few numbers. +#[derive(Debug)] +pub struct MarkerTiming(pub(crate) MaybeUninit); + +impl MarkerTiming { + /// Instant marker timing at a specific time. + pub fn instant_at(time: ProfilerTime) -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_instant_at( + marker_timing.as_mut_ptr(), + &time.0, + ); + } + MarkerTiming(marker_timing) + } + + /// Instant marker timing at this time. + pub fn instant_now() -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_instant_now( + marker_timing.as_mut_ptr(), + ); + } + MarkerTiming(marker_timing) + } + + /// Interval marker timing with start and end times. + pub fn interval(start_time: ProfilerTime, end_time: ProfilerTime) -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_interval( + marker_timing.as_mut_ptr(), + &start_time.0, + &end_time.0, + ); + } + MarkerTiming(marker_timing) + } + + /// Interval marker with a start time and end time as "now". + pub fn interval_until_now_from(start_time: ProfilerTime) -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_interval_until_now_from( + marker_timing.as_mut_ptr(), + &start_time.0, + ); + } + MarkerTiming(marker_timing) + } + + /// Interval start marker with only start time. This is a partial marker and + /// it requires another marker with `instant_end` to be complete. + pub fn interval_start(time: ProfilerTime) -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_interval_start( + marker_timing.as_mut_ptr(), + &time.0, + ); + } + MarkerTiming(marker_timing) + } + + /// Interval end marker with only end time. This is a partial marker and + /// it requires another marker with `interval_start` to be complete. + pub fn interval_end(time: ProfilerTime) -> MarkerTiming { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_marker_timing_interval_end( + marker_timing.as_mut_ptr(), + &time.0, + ); + } + MarkerTiming(marker_timing) + } +} + +impl Default for MarkerTiming { + fn default() -> Self { + MarkerTiming::instant_now() + } +} + +impl Drop for MarkerTiming { + fn drop(&mut self) { + unsafe { + bindings::gecko_profiler_destruct_marker_timing(self.0.as_mut_ptr()); + } + } +} + +/// Marker option that contains marker stack information. +pub type MarkerStack = mozilla::StackCaptureOptions; + +impl Default for MarkerStack { + fn default() -> Self { + MarkerStack::NoStack + } +} + +/// This class combines each of the possible marker options above. +/// Use Default::default() for the options that you don't want to provide or the +/// options you want to leave as default. Example usage: +/// +/// ```rust +/// MarkerOptions { +/// timing: MarkerTiming::instant_now(), +/// ..Default::default() +/// } +/// ``` +#[derive(Debug, Default)] +pub struct MarkerOptions { + pub timing: MarkerTiming, + pub stack: MarkerStack, +} diff --git a/tools/profiler/rust-api/src/marker/schema.rs b/tools/profiler/rust-api/src/marker/schema.rs new file mode 100644 index 0000000000..9368582f11 --- /dev/null +++ b/tools/profiler/rust-api/src/marker/schema.rs @@ -0,0 +1,233 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! [`MarkerSchema`] and other enums that will be used by `MarkerSchema`. + +use crate::gecko_bindings::{bindings, structs::mozilla}; +use std::mem::MaybeUninit; +use std::ops::DerefMut; +use std::os::raw::c_char; +use std::pin::Pin; + +/// Marker locations to be displayed in the profiler front-end. +pub type Location = mozilla::MarkerSchema_Location; + +/// Formats of marker properties for profiler front-end. +pub type Format = mozilla::MarkerSchema_Format; + +/// Whether it's searchable or not in the profiler front-end. +pub type Searchable = mozilla::MarkerSchema_Searchable; + +/// This object collects all the information necessary to stream the JSON schema +/// that informs the front-end how to display a type of markers. +/// It will be created and populated in `marker_type_display()` functions in each +/// marker type definition, see add/set functions. +/// +/// It's a RAII object that constructs and destroys a C++ MarkerSchema object +/// pointed to a specified reference. +pub struct MarkerSchema { + pub(crate) pin: Pin>>, +} + +impl MarkerSchema { + // Initialize a marker schema with the given `Location`s. + pub fn new(locations: &[Location]) -> Self { + let mut marker_schema = Box::pin(std::mem::MaybeUninit::::uninit()); + + unsafe { + bindings::gecko_profiler_construct_marker_schema( + marker_schema.deref_mut().as_mut_ptr(), + locations.as_ptr(), + locations.len(), + ); + } + MarkerSchema { pin: marker_schema } + } + + /// Marker schema for types that have special frontend handling. + /// Nothing else should be set in this case. + pub fn new_with_special_frontend_location() -> Self { + let mut marker_schema = Box::pin(std::mem::MaybeUninit::::uninit()); + unsafe { + bindings::gecko_profiler_construct_marker_schema_with_special_front_end_location( + marker_schema.deref_mut().as_mut_ptr(), + ); + } + MarkerSchema { pin: marker_schema } + } + + /// Optional label in the marker chart. + /// If not provided, the marker "name" will be used. The given string + /// can contain element keys in braces to include data elements streamed by + /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}" + pub fn set_chart_label(&mut self, label: &str) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_set_chart_label( + self.pin.deref_mut().as_mut_ptr(), + label.as_ptr() as *const c_char, + label.len(), + ); + } + self + } + + /// Optional label in the marker chart tooltip. + /// If not provided, the marker "name" will be used. The given string + /// can contain element keys in braces to include data elements streamed by + /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}" + pub fn set_tooltip_label(&mut self, label: &str) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_set_tooltip_label( + self.pin.deref_mut().as_mut_ptr(), + label.as_ptr() as *const c_char, + label.len(), + ); + } + self + } + + /// Optional label in the marker table. + /// If not provided, the marker "name" will be used. The given string + /// can contain element keys in braces to include data elements streamed by + /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}" + pub fn set_table_label(&mut self, label: &str) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_set_table_label( + self.pin.deref_mut().as_mut_ptr(), + label.as_ptr() as *const c_char, + label.len(), + ); + } + self + } + + /// Set all marker chart / marker tooltip / marker table labels with the same text. + /// Same as the individual methods, the given string can contain element keys + /// in braces to include data elements streamed by `stream_json_marker_data()`. + /// E.g.: "This is {marker.data.text}" + pub fn set_all_labels(&mut self, label: &str) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_set_all_labels( + self.pin.deref_mut().as_mut_ptr(), + label.as_ptr() as *const c_char, + label.len(), + ); + } + self + } + + // Each data element that is streamed by `stream_json_marker_data()` can be + // displayed as indicated by using one of the `add_...` function below. + // Each `add...` will add a line in the full marker description. Parameters: + // - `key`: Element property name as streamed by `stream_json_marker_data()`. + // - `label`: Optional label. Defaults to the key name. + // - `format`: How to format the data element value, see `Format` above. + // - `searchable`: Optional, indicates if the value is used in searches, + // defaults to false. + + /// Add a key / format row for the marker data element. + /// - `key`: Element property name as streamed by `stream_json_marker_data()`. + /// - `format`: How to format the data element value, see `Format` above. + pub fn add_key_format(&mut self, key: &str, format: Format) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_add_key_format( + self.pin.deref_mut().as_mut_ptr(), + key.as_ptr() as *const c_char, + key.len(), + format, + ); + } + self + } + + /// Add a key / label / format row for the marker data element. + /// - `key`: Element property name as streamed by `stream_json_marker_data()`. + /// - `label`: Optional label. Defaults to the key name. + /// - `format`: How to format the data element value, see `Format` above. + pub fn add_key_label_format(&mut self, key: &str, label: &str, format: Format) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_add_key_label_format( + self.pin.deref_mut().as_mut_ptr(), + key.as_ptr() as *const c_char, + key.len(), + label.as_ptr() as *const c_char, + label.len(), + format, + ); + } + self + } + + /// Add a key / format / searchable row for the marker data element. + /// - `key`: Element property name as streamed by `stream_json_marker_data()`. + /// - `format`: How to format the data element value, see `Format` above. + pub fn add_key_format_searchable( + &mut self, + key: &str, + format: Format, + searchable: Searchable, + ) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_add_key_format_searchable( + self.pin.deref_mut().as_mut_ptr(), + key.as_ptr() as *const c_char, + key.len(), + format, + searchable, + ); + } + self + } + + /// Add a key / label / format / searchable row for the marker data element. + /// - `key`: Element property name as streamed by `stream_json_marker_data()`. + /// - `label`: Optional label. Defaults to the key name. + /// - `format`: How to format the data element value, see `Format` above. + /// - `searchable`: Optional, indicates if the value is used in searches, + /// defaults to false. + pub fn add_key_label_format_searchable( + &mut self, + key: &str, + label: &str, + format: Format, + searchable: Searchable, + ) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_add_key_label_format_searchable( + self.pin.deref_mut().as_mut_ptr(), + key.as_ptr() as *const c_char, + key.len(), + label.as_ptr() as *const c_char, + label.len(), + format, + searchable, + ); + } + self + } + + /// Add a key / value static row. + /// - `key`: Element property name as streamed by `stream_json_marker_data()`. + /// - `value`: Static value to display. + pub fn add_static_label_value(&mut self, label: &str, value: &str) -> &mut Self { + unsafe { + bindings::gecko_profiler_marker_schema_add_static_label_value( + self.pin.deref_mut().as_mut_ptr(), + label.as_ptr() as *const c_char, + label.len(), + value.as_ptr() as *const c_char, + value.len(), + ); + } + self + } +} + +impl Drop for MarkerSchema { + fn drop(&mut self) { + unsafe { + bindings::gecko_profiler_destruct_marker_schema(self.pin.deref_mut().as_mut_ptr()); + } + } +} diff --git a/tools/profiler/rust-api/src/profiler_state.rs b/tools/profiler/rust-api/src/profiler_state.rs new file mode 100644 index 0000000000..0d5359684d --- /dev/null +++ b/tools/profiler/rust-api/src/profiler_state.rs @@ -0,0 +1,78 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Gecko profiler state. + +/// Whether the Gecko profiler is currently active. +/// A typical use of this API: +/// ```rust +/// if gecko_profiler::is_active() { +/// // do something. +/// } +/// ``` +/// +/// This implementation must be kept in sync with +/// `mozilla::profiler::detail::RacyFeatures::IsActive`. +#[cfg(feature = "enabled")] +#[inline] +pub fn is_active() -> bool { + use crate::gecko_bindings::structs::mozilla::profiler::detail; + + let active_and_features = get_active_and_features(); + (active_and_features & detail::RacyFeatures_Active) != 0 +} + +/// Always false when MOZ_GECKO_PROFILER is not defined. +#[cfg(not(feature = "enabled"))] +#[inline] +pub fn is_active() -> bool { + false +} + +/// Whether the Gecko Profiler can accept markers. +/// Similar to `is_active`, but with some extra checks that determine if the +/// profiler would currently store markers. So this should be used before +/// doing some potentially-expensive work that's used in a marker. E.g.: +/// +/// ```rust +/// if gecko_profiler::can_accept_markers() { +/// // Do something expensive and add the marker with that data. +/// } +/// ``` +/// +/// This implementation must be kept in sync with +/// `mozilla::profiler::detail::RacyFeatures::IsActiveAndUnpaused`. +#[cfg(feature = "enabled")] +#[inline] +pub fn can_accept_markers() -> bool { + use crate::gecko_bindings::structs::mozilla::profiler::detail; + + let active_and_features = get_active_and_features(); + (active_and_features & detail::RacyFeatures_Active) != 0 + && (active_and_features & detail::RacyFeatures_Paused) == 0 +} + +/// Always false when MOZ_GECKO_PROFILER is not defined. +#[cfg(not(feature = "enabled"))] +#[inline] +pub fn can_accept_markers() -> bool { + false +} + +/// Returns the value of atomic `RacyFeatures::sActiveAndFeatures` from the C++ side. +#[cfg(feature = "enabled")] +#[inline] +fn get_active_and_features() -> u32 { + use crate::gecko_bindings::structs::mozilla::profiler::detail; + use std::mem; + use std::sync::atomic::{AtomicU32, Ordering}; + + // This is reaching for the C++ atomic value instead of calling an FFI + // function to return this value. Because, calling an FFI function is much + // more expensive compared to this method. That's why it's worth to go with + // this solution for performance. But it's crucial to keep the implementation + // of this and the callers in sync with the C++ counterparts. + unsafe { mem::transmute::<_, &AtomicU32>(&detail::RacyFeatures_sActiveAndFeatures) } + .load(Ordering::Relaxed) +} diff --git a/tools/profiler/rust-api/src/thread.rs b/tools/profiler/rust-api/src/thread.rs new file mode 100644 index 0000000000..353469a4bb --- /dev/null +++ b/tools/profiler/rust-api/src/thread.rs @@ -0,0 +1,23 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +///! Profiler API for thread registration and unregistration. +use crate::gecko_bindings::bindings; +use std::ffi::CString; + +/// Register a thread with the Gecko Profiler. +pub fn register_thread(thread_name: &str) { + let name = CString::new(thread_name).unwrap(); + unsafe { + // gecko_profiler_register_thread copies the passed name here. + bindings::gecko_profiler_register_thread(name.as_ptr()); + } +} + +/// Unregister a thread with the Gecko Profiler. +pub fn unregister_thread() { + unsafe { + bindings::gecko_profiler_unregister_thread(); + } +} diff --git a/tools/profiler/rust-api/src/time.rs b/tools/profiler/rust-api/src/time.rs new file mode 100644 index 0000000000..56315690c9 --- /dev/null +++ b/tools/profiler/rust-api/src/time.rs @@ -0,0 +1,71 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Gecko profiler time. + +use crate::gecko_bindings::{bindings, structs::mozilla}; +use std::mem::MaybeUninit; + +/// Profiler time for the marker API. +/// This should be used as the `MarkerTiming` parameter. +/// E.g.: +/// +/// ``` +/// let start = ProfilerTime::now(); +/// // ...some code... +/// gecko_profiler::add_untyped_marker( +/// "marker name", +/// category, +/// MarkerOptions { +/// timing: MarkerTiming::interval_until_now_from(start), +/// ..Default::default() +/// }, +/// ); +/// ``` +#[derive(Debug)] +pub struct ProfilerTime(pub(crate) mozilla::TimeStamp); + +impl ProfilerTime { + pub fn now() -> ProfilerTime { + let mut marker_timing = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_construct_timestamp_now(marker_timing.as_mut_ptr()); + ProfilerTime(marker_timing.assume_init()) + } + } + + pub fn add_microseconds(self, microseconds: f64) -> Self { + let mut dest = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_add_timestamp(&self.0, dest.as_mut_ptr(), microseconds); + ProfilerTime(dest.assume_init()) + } + } + + pub fn subtract_microseconds(self, microseconds: f64) -> Self { + let mut dest = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_subtract_timestamp(&self.0, dest.as_mut_ptr(), microseconds); + ProfilerTime(dest.assume_init()) + } + } +} + +impl Clone for ProfilerTime { + fn clone(&self) -> Self { + let mut dest = MaybeUninit::::uninit(); + unsafe { + bindings::gecko_profiler_clone_timestamp(&self.0, dest.as_mut_ptr()); + ProfilerTime(dest.assume_init()) + } + } +} + +impl Drop for ProfilerTime { + fn drop(&mut self) { + unsafe { + bindings::gecko_profiler_destruct_timestamp(&mut self.0); + } + } +} diff --git a/tools/profiler/rust-helper/Cargo.toml b/tools/profiler/rust-helper/Cargo.toml new file mode 100644 index 0000000000..6d3d168ed4 --- /dev/null +++ b/tools/profiler/rust-helper/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "profiler_helper" +version = "0.1.0" +authors = ["Markus Stange "] +license = "MPL-2.0" + +[dependencies] +memmap2 = "0.5" +rustc-demangle = "0.1" +uuid = "1.0" + +[dependencies.object] +version = "0.30" +optional = true +default-features = false +features = ["std", "read_core", "elf"] + +[dependencies.thin-vec] +version = "0.2.1" +features = ["gecko-ffi"] + +[features] +parse_elf = ["object"] diff --git a/tools/profiler/rust-helper/src/compact_symbol_table.rs b/tools/profiler/rust-helper/src/compact_symbol_table.rs new file mode 100644 index 0000000000..12c4ca081b --- /dev/null +++ b/tools/profiler/rust-helper/src/compact_symbol_table.rs @@ -0,0 +1,40 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::collections::HashMap; +use thin_vec::ThinVec; + +#[repr(C)] +pub struct CompactSymbolTable { + pub addr: ThinVec, + pub index: ThinVec, + pub buffer: ThinVec, +} + +impl CompactSymbolTable { + pub fn new() -> Self { + Self { + addr: ThinVec::new(), + index: ThinVec::new(), + buffer: ThinVec::new(), + } + } + + pub fn from_map(map: HashMap) -> Self { + let mut table = Self::new(); + let mut entries: Vec<_> = map.into_iter().collect(); + entries.sort_by_key(|&(addr, _)| addr); + for (addr, name) in entries { + table.addr.push(addr); + table.index.push(table.buffer.len() as u32); + table.add_name(name); + } + table.index.push(table.buffer.len() as u32); + table + } + + fn add_name(&mut self, name: &str) { + self.buffer.extend_from_slice(name.as_bytes()); + } +} diff --git a/tools/profiler/rust-helper/src/elf.rs b/tools/profiler/rust-helper/src/elf.rs new file mode 100644 index 0000000000..4930884f05 --- /dev/null +++ b/tools/profiler/rust-helper/src/elf.rs @@ -0,0 +1,101 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use compact_symbol_table::CompactSymbolTable; +use object::read::{NativeFile, Object}; +use object::{ObjectSection, ObjectSymbol, SectionKind, SymbolKind}; +use std::cmp; +use std::collections::HashMap; +use uuid::Uuid; + +const UUID_SIZE: usize = 16; +const PAGE_SIZE: usize = 4096; + +fn get_symbol_map<'a: 'b, 'b, T>(object_file: &'b T) -> HashMap +where + T: Object<'a, 'b>, +{ + object_file + .dynamic_symbols() + .chain(object_file.symbols()) + .filter(|symbol| symbol.kind() == SymbolKind::Text) + .filter_map(|symbol| { + symbol + .name() + .map(|name| (symbol.address() as u32, name)) + .ok() + }) + .collect() +} + +pub fn get_compact_symbol_table( + buffer: &[u8], + breakpad_id: Option<&str>, +) -> Option { + let elf_file = NativeFile::parse(buffer).ok()?; + let elf_id = get_elf_id(&elf_file)?; + if !breakpad_id.map_or(true, |id| id == format!("{:X}0", elf_id.as_simple())) { + return None; + } + return Some(CompactSymbolTable::from_map(get_symbol_map(&elf_file))); +} + +fn create_elf_id(identifier: &[u8], little_endian: bool) -> Uuid { + // Make sure that we have exactly UUID_SIZE bytes available + let mut data = [0 as u8; UUID_SIZE]; + let len = cmp::min(identifier.len(), UUID_SIZE); + data[0..len].copy_from_slice(&identifier[0..len]); + + if little_endian { + // The file ELF file targets a little endian architecture. Convert to + // network byte order (big endian) to match the Breakpad processor's + // expectations. For big endian object files, this is not needed. + data[0..4].reverse(); // uuid field 1 + data[4..6].reverse(); // uuid field 2 + data[6..8].reverse(); // uuid field 3 + } + + Uuid::from_bytes(data) +} + +/// Tries to obtain the object identifier of an ELF object. +/// +/// As opposed to Mach-O, ELF does not specify a unique ID for object files in +/// its header. Compilers and linkers usually add either `SHT_NOTE` sections or +/// `PT_NOTE` program header elements for this purpose. If one of these notes +/// is present, ElfFile's build_id() method will find it. +/// +/// If neither of the above are present, this function will hash the first page +/// of the `.text` section (program code). This matches what the Breakpad +/// processor does. +/// +/// If all of the above fails, this function will return `None`. +pub fn get_elf_id(elf_file: &NativeFile) -> Option { + if let Ok(Some(identifier)) = elf_file.build_id() { + return Some(create_elf_id(identifier, elf_file.is_little_endian())); + } + + // We were not able to locate the build ID, so fall back to hashing the + // first page of the ".text" (program code) section. This algorithm XORs + // 16-byte chunks directly into a UUID buffer. + if let Some(section_data) = find_text_section(elf_file) { + let mut hash = [0; UUID_SIZE]; + for i in 0..cmp::min(section_data.len(), PAGE_SIZE) { + hash[i % UUID_SIZE] ^= section_data[i]; + } + return Some(create_elf_id(&hash, elf_file.is_little_endian())); + } + + None +} + +/// Returns a reference to the data of the the .text section in an ELF binary. +fn find_text_section<'elf>(elf_file: &'elf NativeFile) -> Option<&'elf [u8]> { + if let Some(section) = elf_file.section_by_name(".text") { + if section.kind() == SectionKind::Text { + return section.data().ok(); + } + } + None +} diff --git a/tools/profiler/rust-helper/src/lib.rs b/tools/profiler/rust-helper/src/lib.rs new file mode 100644 index 0000000000..22f8e04a2e --- /dev/null +++ b/tools/profiler/rust-helper/src/lib.rs @@ -0,0 +1,107 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +extern crate memmap2; +extern crate rustc_demangle; +extern crate thin_vec; +extern crate uuid; + +#[cfg(feature = "parse_elf")] +extern crate object; + +mod compact_symbol_table; + +#[cfg(feature = "parse_elf")] +mod elf; + +#[cfg(feature = "parse_elf")] +use memmap2::MmapOptions; +#[cfg(feature = "parse_elf")] +use std::fs::File; + +use compact_symbol_table::CompactSymbolTable; +use rustc_demangle::try_demangle; +use std::ffi::CStr; +use std::mem; +use std::os::raw::c_char; +use std::ptr; + +#[cfg(feature = "parse_elf")] +pub fn get_compact_symbol_table_from_file( + debug_path: &str, + breakpad_id: Option<&str>, +) -> Option { + let file = File::open(debug_path).ok()?; + let buffer = unsafe { MmapOptions::new().map(&file).ok()? }; + elf::get_compact_symbol_table(&buffer, breakpad_id) +} + +#[cfg(not(feature = "parse_elf"))] +pub fn get_compact_symbol_table_from_file( + _debug_path: &str, + _breakpad_id: Option<&str>, +) -> Option { + None +} + +#[no_mangle] +pub unsafe extern "C" fn profiler_get_symbol_table( + debug_path: *const c_char, + breakpad_id: *const c_char, + symbol_table: &mut CompactSymbolTable, +) -> bool { + let debug_path = CStr::from_ptr(debug_path).to_string_lossy(); + let breakpad_id = if breakpad_id.is_null() { + None + } else { + match CStr::from_ptr(breakpad_id).to_str() { + Ok(s) => Some(s), + Err(_) => return false, + } + }; + + match get_compact_symbol_table_from_file(&debug_path, breakpad_id.map(|id| id.as_ref())) { + Some(mut st) => { + std::mem::swap(symbol_table, &mut st); + true + } + None => false, + } +} + +#[no_mangle] +pub unsafe extern "C" fn profiler_demangle_rust( + mangled: *const c_char, + buffer: *mut c_char, + buffer_len: usize, +) -> bool { + assert!(!mangled.is_null()); + assert!(!buffer.is_null()); + + if buffer_len == 0 { + return false; + } + + let buffer: *mut u8 = mem::transmute(buffer); + let mangled = match CStr::from_ptr(mangled).to_str() { + Ok(s) => s, + Err(_) => return false, + }; + + match try_demangle(mangled) { + Ok(demangled) => { + let mut demangled = format!("{:#}", demangled); + if !demangled.is_ascii() { + return false; + } + demangled.truncate(buffer_len - 1); + + let bytes = demangled.as_bytes(); + ptr::copy(bytes.as_ptr(), buffer, bytes.len()); + ptr::write(buffer.offset(bytes.len() as isize), 0); + true + } + Err(_) => false, + } +} diff --git a/tools/profiler/tests/browser/browser.ini b/tools/profiler/tests/browser/browser.ini new file mode 100644 index 0000000000..654446e36e --- /dev/null +++ b/tools/profiler/tests/browser/browser.ini @@ -0,0 +1,102 @@ +[DEFAULT] +skip-if = tsan # Bug 1804081 - TSan times out on pretty much all of these tests +support-files = + ../shared-head.js + head.js + +[browser_test_feature_ipcmessages.js] +support-files = simple.html + +[browser_test_feature_jsallocations.js] +support-files = do_work_500ms.html + +[browser_test_feature_nostacksampling.js] +support-files = do_work_500ms.html + +[browser_test_markers_parent_process.js] +skip-if = + os == "win" && os_version == "6.1" # Skip on Azure - frequent failure + +[browser_test_markers_preferencereads.js] +support-files = single_frame.html + +[browser_test_markers_gc_cc.js] + +[browser_test_profile_capture_by_pid.js] +skip-if = os == "win" && os_version == "6.1" # No thread names on win7, needed for these tests +https_first_disabled = true +support-files = single_frame.html + +[browser_test_profile_fission.js] +support-files = single_frame.html + +[browser_test_profile_single_frame_page_info.js] +https_first_disabled = true +support-files = single_frame.html + +[browser_test_profile_slow_capture.js] +https_first_disabled = true +support-files = single_frame.html +skip-if = !debug + +[browser_test_profile_multi_frame_page_info.js] +https_first_disabled = true +support-files = + multi_frame.html + single_frame.html + +[browser_test_marker_network_simple.js] +https_first_disabled = true +support-files = simple.html + +[browser_test_marker_network_private_browsing.js] +support-files = simple.html + +[browser_test_marker_network_cancel.js] +https_first_disabled = true +support-files = simple.html + +[browser_test_marker_network_sts.js] +support-files = simple.html + +[browser_test_marker_network_redirect.js] +https_first_disabled = true +support-files = + redirect.sjs + simple.html + page_with_resources.html + firefox-logo-nightly.svg +skip-if = + os == "win" && os_version == "6.1" # Skip on Azure - frequent failure + +[browser_test_marker_network_serviceworker_cache_first.js] +support-files = + serviceworkers/serviceworker-utils.js + serviceworkers/serviceworker_register.html + serviceworkers/serviceworker_page.html + serviceworkers/firefox-logo-nightly.svg + serviceworkers/serviceworker_cache_first.js + +[browser_test_marker_network_serviceworker_no_fetch_handler.js] +support-files = + serviceworkers/serviceworker-utils.js + serviceworkers/serviceworker_register.html + serviceworkers/serviceworker_page.html + serviceworkers/firefox-logo-nightly.svg + serviceworkers/serviceworker_no_fetch_handler.js + +[browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js] +support-files = + serviceworkers/serviceworker-utils.js + serviceworkers/serviceworker_register.html + serviceworkers/serviceworker_page.html + serviceworkers/firefox-logo-nightly.svg + serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js + +[browser_test_marker_network_serviceworker_synthetized_response.js] +support-files = + serviceworkers/serviceworker-utils.js + serviceworkers/serviceworker_register.html + serviceworkers/serviceworker_simple.html + serviceworkers/firefox-logo-nightly.svg + serviceworkers/serviceworker_synthetized_response.js diff --git a/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js b/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js new file mode 100644 index 0000000000..f5fb2921a1 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js @@ -0,0 +1,100 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +requestLongerTimeout(10); + +async function waitForLoad() { + return SpecialPowers.spawn(gBrowser.selectedBrowser, [], () => { + return new Promise(function (resolve) { + if (content.document.readyState !== "complete") { + content.document.addEventListener("readystatechange", () => { + if (content.document.readyState === "complete") { + resolve(); + } + }); + } else { + resolve(); + } + }); + }); +} + +/** + * Test the IPCMessages feature. + */ +add_task(async function test_profile_feature_ipcmessges() { + const url = BASE_URL + "simple.html"; + + info("Open a tab while profiling IPC messages."); + await startProfiler({ features: ["js", "ipcmessages"] }); + info("Started the profiler sucessfully! Now, let's open a tab."); + + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + info("We opened a tab!"); + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + info("Now let's wait until it's fully loaded."); + await waitForLoad(); + + info( + "Check that some IPC profile markers were generated when " + + "the feature is enabled." + ); + { + const { parentThread, contentThread } = + await waitSamplingAndStopProfilerAndGetThreads(contentPid); + + Assert.greater( + getPayloadsOfType(parentThread, "IPC").length, + 0, + "IPC profile markers were recorded for the parent process' main " + + "thread when the IPCMessages feature was turned on." + ); + + Assert.greater( + getPayloadsOfType(contentThread, "IPC").length, + 0, + "IPC profile markers were recorded for the content process' main " + + "thread when the IPCMessages feature was turned on." + ); + } + }); + + info("Now open a tab without profiling IPC messages."); + await startProfiler({ features: ["js"] }); + + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + await waitForLoad(); + + info( + "Check that no IPC profile markers were recorded when the " + + "feature is turned off." + ); + { + const { parentThread, contentThread } = + await waitSamplingAndStopProfilerAndGetThreads(contentPid); + Assert.equal( + getPayloadsOfType(parentThread, "IPC").length, + 0, + "No IPC profile markers were recorded for the parent process' main " + + "thread when the IPCMessages feature was turned off." + ); + + Assert.equal( + getPayloadsOfType(contentThread, "IPC").length, + 0, + "No IPC profile markers were recorded for the content process' main " + + "thread when the IPCMessages feature was turned off." + ); + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_feature_jsallocations.js b/tools/profiler/tests/browser/browser_test_feature_jsallocations.js new file mode 100644 index 0000000000..60d072bed9 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_feature_jsallocations.js @@ -0,0 +1,74 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +requestLongerTimeout(10); + +/** + * Test the JS Allocations feature. This is done as a browser test to ensure that + * we realistically try out how the JS allocations are running. This ensures that + * we are collecting allocations for the content process and the parent process. + */ +add_task(async function test_profile_feature_jsallocations() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + await startProfiler({ features: ["js", "jsallocations"] }); + + const url = BASE_URL + "do_work_500ms.html"; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + // Wait 500ms so that the tab finishes executing. + await wait(500); + + // Check that we can get some allocations when the feature is turned on. + { + const { parentThread, contentThread } = + await waitSamplingAndStopProfilerAndGetThreads(contentPid); + Assert.greater( + getPayloadsOfType(parentThread, "JS allocation").length, + 0, + "Allocations were recorded for the parent process' main thread when the " + + "JS Allocation feature was turned on." + ); + Assert.greater( + getPayloadsOfType(contentThread, "JS allocation").length, + 0, + "Allocations were recorded for the content process' main thread when the " + + "JS Allocation feature was turned on." + ); + } + + await startProfiler({ features: ["js"] }); + // Now reload the tab with a clean run. + gBrowser.reload(); + await wait(500); + + // Check that no allocations were recorded, and allocation tracking was correctly + // turned off. + { + const { parentThread, contentThread } = + await waitSamplingAndStopProfilerAndGetThreads(contentPid); + Assert.equal( + getPayloadsOfType(parentThread, "JS allocation").length, + 0, + "No allocations were recorded for the parent processes' main thread when " + + "JS allocation was not turned on." + ); + + Assert.equal( + getPayloadsOfType(contentThread, "JS allocation").length, + 0, + "No allocations were recorded for the content processes' main thread when " + + "JS allocation was not turned on." + ); + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js b/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js new file mode 100644 index 0000000000..323a87e191 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js @@ -0,0 +1,72 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test the No Stack Sampling feature. + */ +add_task(async function test_profile_feature_nostacksampling() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + await startProfiler({ features: ["js", "nostacksampling"] }); + + const url = BASE_URL + "do_work_500ms.html"; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + // Wait 500ms so that the tab finishes executing. + await wait(500); + + // Check that we can get no stacks when the feature is turned on. + { + const { parentThread, contentThread } = + await stopProfilerNowAndGetThreads(contentPid); + Assert.equal( + parentThread.samples.data.length, + 0, + "Stack samples were recorded from the parent process' main thread" + + "when the No Stack Sampling feature was turned on." + ); + Assert.equal( + contentThread.samples.data.length, + 0, + "Stack samples were recorded from the content process' main thread" + + "when the No Stack Sampling feature was turned on." + ); + } + + // Flush out any straggling allocation markers that may have not been collected + // yet by starting and stopping the profiler once. + await startProfiler({ features: ["js"] }); + + // Now reload the tab with a clean run. + gBrowser.reload(); + await wait(500); + + // Check that stack samples were recorded. + { + const { parentThread, contentThread } = + await waitSamplingAndStopProfilerAndGetThreads(contentPid); + Assert.greater( + parentThread.samples.data.length, + 0, + "No Stack samples were recorded from the parent process' main thread" + + "when the No Stack Sampling feature was not turned on." + ); + + Assert.greater( + contentThread.samples.data.length, + 0, + "No Stack samples were recorded from the content process' main thread" + + "when the No Stack Sampling feature was not turned on." + ); + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_cancel.js b/tools/profiler/tests/browser/browser_test_marker_network_cancel.js new file mode 100644 index 0000000000..0a850487af --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_cancel.js @@ -0,0 +1,71 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers with the cancel status. + */ +add_task(async function test_network_markers_early_cancel() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = BASE_URL + "simple.html?cacheBust=" + Math.random(); + const options = { + gBrowser, + url: "about:blank", + waitForLoad: false, + }; + + const tab = await BrowserTestUtils.openNewForegroundTab(options); + const loadPromise = BrowserTestUtils.waitForDocLoadAndStopIt(url, tab); + BrowserTestUtils.loadURIString(tab.linkedBrowser, url); + const contentPid = await SpecialPowers.spawn( + tab.linkedBrowser, + [], + () => Services.appinfo.processID + ); + await loadPromise; + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + BrowserTestUtils.removeTab(tab); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + + info("parent process: " + JSON.stringify(parentNetworkMarkers, null, 2)); + info("content process: " + JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 2, + `We should get a pair of network markers in the parent thread.` + ); + + // We don't test the markers in the content process, because depending on some + // timing we can have 0 or 1 (and maybe even 2 (?)). + + const parentStopMarker = parentNetworkMarkers[1]; + + const expectedProperties = { + name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_CANCEL", + URI: url, + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + cache: "Unresolved", + }), + }; + + Assert.objectContains(parentStopMarker, expectedProperties); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js b/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js new file mode 100644 index 0000000000..f898ebda29 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js @@ -0,0 +1,91 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly + */ +add_task(async function test_network_markers() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const win = await BrowserTestUtils.openNewBrowserWindow({ + private: true, + fission: true, + }); + try { + const url = BASE_URL_HTTPS + "simple.html?cacheBust=" + Math.random(); + const contentBrowser = win.gBrowser.selectedBrowser; + BrowserTestUtils.loadURIString(contentBrowser, url); + await BrowserTestUtils.browserLoaded(contentBrowser, false, url); + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + info(JSON.stringify(parentNetworkMarkers, null, 2)); + info(JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 2, + `We should get a pair of network markers in the parent thread.` + ); + Assert.equal( + contentNetworkMarkers.length, + 2, + `We should get a pair of network markers in the content thread.` + ); + + const parentStopMarker = parentNetworkMarkers[1]; + const contentStopMarker = contentNetworkMarkers[1]; + + const expectedProperties = { + name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`), + data: Expect.objectContains({ + status: "STATUS_STOP", + URI: url, + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + isPrivateBrowsing: true, + }), + }; + + Assert.objectContains(parentStopMarker, expectedProperties); + // The cache information is missing from the content marker, it's only part + // of the parent marker. See Bug 1544821. + Assert.objectContains(parentStopMarker.data, { + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + Assert.objectContains(contentStopMarker, expectedProperties); + } finally { + await BrowserTestUtils.closeWindow(win); + } +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_redirect.js b/tools/profiler/tests/browser/browser_test_marker_network_redirect.js new file mode 100644 index 0000000000..28478c2b3b --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_redirect.js @@ -0,0 +1,341 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test the redirect cases. + */ +add_task(async function test_network_markers_service_worker_setup() { + // Disabling cache makes the result more predictable especially in verify mode. + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + ], + }); +}); + +add_task(async function test_network_markers_redirect_simple() { + // In this test, we request an HTML page that gets redirected. This is a + // top-level navigation. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const targetFileNameWithCacheBust = "simple.html"; + const url = + BASE_URL + + "redirect.sjs?" + + encodeURIComponent(targetFileNameWithCacheBust); + const targetUrl = BASE_URL + targetFileNameWithCacheBust; + + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + info(JSON.stringify(parentNetworkMarkers, null, 2)); + info(JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 4, + `We should get 2 pairs of network markers in the parent thread.` + ); + + /* It looks like that for a redirection for the top level navigation, the + * content thread sees the markers for the second request only. + * See Bug 1692879. */ + Assert.equal( + contentNetworkMarkers.length, + 2, + `We should get one pair of network markers in the content thread.` + ); + + const parentRedirectMarker = parentNetworkMarkers[1]; + const parentStopMarker = parentNetworkMarkers[3]; + // There's no content redirect marker for the reason outlined above. + const contentStopMarker = contentNetworkMarkers[1]; + + Assert.objectContains(parentRedirectMarker, { + name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_REDIRECT", + URI: url, + RedirectURI: targetUrl, + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + redirectId: parentStopMarker.data.id, + pri: Expect.number(), + cache: Expect.stringMatches(/Missed|Unresolved/), + redirectType: "Permanent", + isHttpToHttpsRedirect: false, + }), + }); + + const expectedProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(targetUrl)}` + ), + }; + const expectedDataProperties = { + type: "Network", + status: "STATUS_STOP", + URI: targetUrl, + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + }; + + Assert.objectContains(parentStopMarker, expectedProperties); + Assert.objectContains(contentStopMarker, expectedProperties); + + // The cache information is missing from the content marker, it's only part + // of the parent marker. See Bug 1544821. + Assert.objectContainsOnly(parentStopMarker.data, { + ...expectedDataProperties, + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + Assert.objectContainsOnly(contentStopMarker.data, expectedDataProperties); + }); +}); + +add_task(async function test_network_markers_redirect_resources() { + // In this test we request an HTML file that itself contains resources that + // are redirected. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = BASE_URL + "page_with_resources.html?cacheBust=" + Math.random(); + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + info(JSON.stringify(parentNetworkMarkers, null, 2)); + info(JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 8, + `We should get 4 pairs of network markers in the parent thread.` + // 1 - The main page + // 2 - The SVG + // 3 - The redirected request for the second SVG request. + // 4 - The SVG, again + ); + + /* In this second test, the top level navigation request isn't redirected. + * Contrary to Bug 1692879 we get all network markers for redirected + * resources. */ + Assert.equal( + contentNetworkMarkers.length, + 8, + `We should get 4 pairs of network markers in the content thread.` + ); + + // The same resource firefox-logo-nightly.svg is requested twice, but the + // second time it is redirected. + // We're not interested in the main page, as we test that in other files. + // In this page we're only interested in the marker for requested resources. + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + + const parentFirstStopMarker = parentPairs[1][1]; + const parentRedirectMarker = parentPairs[2][1]; + const parentSecondStopMarker = parentPairs[3][1]; + const contentFirstStopMarker = contentPairs[1][1]; + const contentRedirectMarker = contentPairs[2][1]; + const contentSecondStopMarker = contentPairs[3][1]; + + const expectedCommonDataProperties = { + type: "Network", + requestMethod: "GET", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + innerWindowID: Expect.number(), + }; + + // These properties are present when a connection is fully opened. This is + // most often the case, unless we're in verify mode, because in that case + // we run the same tests several times in the same Firefox and they might be + // cached, or in chaos mode Firefox may make all requests sequentially on + // the same connection. + // In these cases, these properties won't always be present. + const expectedConnectionProperties = { + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + }; + + const expectedPropertiesForStopMarker = { + name: Expect.stringMatches(/Load \d+:.*\/firefox-logo-nightly\.svg/), + }; + + const expectedDataPropertiesForStopMarker = { + ...expectedCommonDataProperties, + ...expectedConnectionProperties, + status: "STATUS_STOP", + URI: Expect.stringContains("/firefox-logo-nightly.svg"), + contentType: "image/svg+xml", + count: Expect.number(), + }; + + const expectedPropertiesForRedirectMarker = { + name: Expect.stringMatches( + /Load \d+:.*\/redirect.sjs\?firefox-logo-nightly\.svg/ + ), + }; + + const expectedDataPropertiesForRedirectMarker = { + ...expectedCommonDataProperties, + ...expectedConnectionProperties, + status: "STATUS_REDIRECT", + URI: Expect.stringContains("/redirect.sjs?firefox-logo-nightly.svg"), + RedirectURI: Expect.stringContains("/firefox-logo-nightly.svg"), + contentType: null, + redirectType: "Permanent", + isHttpToHttpsRedirect: false, + }; + + Assert.objectContains( + parentFirstStopMarker, + expectedPropertiesForStopMarker + ); + Assert.objectContainsOnly(parentFirstStopMarker.data, { + ...expectedDataPropertiesForStopMarker, + // The cache information is missing from the content marker, it's only part + // of the parent marker. See Bug 1544821. + // Also, because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + + Assert.objectContains( + contentFirstStopMarker, + expectedPropertiesForStopMarker + ); + Assert.objectContainsOnly( + contentFirstStopMarker.data, + expectedDataPropertiesForStopMarker + ); + + Assert.objectContains( + parentRedirectMarker, + expectedPropertiesForRedirectMarker + ); + Assert.objectContainsOnly(parentRedirectMarker.data, { + ...expectedDataPropertiesForRedirectMarker, + redirectId: parentSecondStopMarker.data.id, + // See above for the full explanation about the cache property. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + + Assert.objectContains( + contentRedirectMarker, + expectedPropertiesForRedirectMarker + ); + Assert.objectContainsOnly(contentRedirectMarker.data, { + ...expectedDataPropertiesForRedirectMarker, + redirectId: contentSecondStopMarker.data.id, + }); + + Assert.objectContains( + parentSecondStopMarker, + expectedPropertiesForStopMarker + ); + Assert.objectContainsOnly(parentSecondStopMarker.data, { + ...expectedDataPropertiesForStopMarker, + // The "count" property is absent from the content marker. + count: Expect.number(), + // See above for the full explanation about the cache property. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + + Assert.objectContains( + contentSecondStopMarker, + expectedPropertiesForStopMarker + ); + Assert.objectContainsOnly( + contentSecondStopMarker.data, + expectedDataPropertiesForStopMarker + ); + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js new file mode 100644 index 0000000000..c1ad49b262 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js @@ -0,0 +1,378 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test a caching service worker. This service worker will + * fetch and store requests at install time, and serve them when the page + * requests them. + */ + +const serviceWorkerFileName = "serviceworker_cache_first.js"; +registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData()); + +add_task(async function test_network_markers_service_worker_setup() { + // Disabling cache makes the result more predictable. Also this makes things + // simpler when dealing with service workers. + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + ], + }); +}); + +add_task(async function test_network_markers_service_worker_register() { + // In this first step, we request an HTML page that will register a service + // worker. We'll wait until the service worker is fully installed before + // checking various things. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + await SpecialPowers.spawn( + contentBrowser, + [serviceWorkerFileName], + async function (serviceWorkerFileName) { + await content.wrappedJSObject.registerServiceWorkerAndWait( + serviceWorkerFileName + ); + } + ); + + const { parentThread, contentThread, profile } = + await stopProfilerNowAndGetThreads(contentPid); + + // The service worker work happens in a third "thread" or process, let's try + // to find it. + // Currently the fetches happen on the main thread for the content process, + // this may change in the future and we may have to adapt this function. + // Also please note this isn't necessarily the same content process as the + // ones for the tab. + const { serviceWorkerParentThread } = findServiceWorkerThreads(profile); + + // Here are a few sanity checks. + ok( + serviceWorkerParentThread, + "We should find a thread for the service worker." + ); + + Assert.notEqual( + serviceWorkerParentThread.pid, + parentThread.pid, + "We should have a different pid than the parent thread." + ); + Assert.notEqual( + serviceWorkerParentThread.tid, + parentThread.tid, + "We should have a different tid than the parent thread." + ); + + // Let's make sure we actually have a registered service workers. + const workers = await SpecialPowers.registeredServiceWorkers(); + Assert.equal( + workers.length, + 1, + "One service worker should be properly registered." + ); + + // By logging a few information about the threads we make debugging easier. + logInformationForThread("parentThread information", parentThread); + logInformationForThread("contentThread information", contentThread); + logInformationForThread( + "serviceWorkerParentThread information", + serviceWorkerParentThread + ); + + // Now let's check the marker payloads. + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread) + // When we load a page, Firefox will check the service worker freshness + // after a few seconds. So when the test lasts a long time (with some test + // environments) we might see spurious markers about that that we're not + // interesting in in this part of the test. They're only present in the + // parent process. + .filter(marker => !marker.data.URI.includes(serviceWorkerFileName)); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + const serviceWorkerNetworkMarkers = getInflatedNetworkMarkers( + serviceWorkerParentThread + ); + + // Some more logs for debugging purposes. + info( + "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2) + ); + info( + "Content network markers: " + + JSON.stringify(contentNetworkMarkers, null, 2) + ); + info( + "Serviceworker network markers: " + + JSON.stringify(serviceWorkerNetworkMarkers, null, 2) + ); + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + const serviceWorkerPairs = getPairsOfNetworkMarkers( + serviceWorkerNetworkMarkers + ); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + serviceWorkerPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the service worker process.` + ) + ); + + // Let's look at all pairs and make sure we requested all expected files. + const parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker); + + // These are the files cached by the service worker. We should see markers + // for both the parent thread and the service worker thread. + const expectedFiles = [ + "serviceworker_page.html", + "firefox-logo-nightly.svg", + ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`); + + for (const expectedFile of expectedFiles) { + info( + `Checking if "${expectedFile}" is present in the network markers in both processes.` + ); + const parentMarker = parentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + + const expectedProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(expectedFile)}` + ), + data: Expect.objectContains({ + status: "STATUS_STOP", + URI: expectedFile, + requestMethod: "GET", + contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/), + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + }), + }; + + Assert.objectContains(parentMarker, expectedProperties); + } + }); +}); + +add_task(async function test_network_markers_service_worker_use() { + // In this test we request an HTML file that itself contains resources that + // are redirected. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + // By logging a few information about the threads we make debugging easier. + logInformationForThread("parentThread information", parentThread); + logInformationForThread("contentThread information", contentThread); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread) + // When we load a page, Firefox will check the service worker freshness + // after a few seconds. So when the test lasts a long time (with some test + // environments) we might see spurious markers about that that we're not + // interesting in in this part of the test. They're only present in the + // parent process. + .filter(marker => !marker.data.URI.includes(serviceWorkerFileName)); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + + // Here are some logs to ease debugging. + info( + "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2) + ); + info( + "Content network markers: " + + JSON.stringify(contentNetworkMarkers, null, 2) + ); + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + + // These are the files cached by the service worker. We should see markers + // for the parent thread and the content thread. + const expectedFiles = [ + "serviceworker_page.html", + "firefox-logo-nightly.svg", + ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + + // Let's look at all pairs and make sure we requested all expected files. + const parentEndMarkers = parentPairs.map(([_, endMarker]) => endMarker); + const contentStopMarkers = contentPairs.map( + ([_, stopMarker]) => stopMarker + ); + + Assert.equal( + parentEndMarkers.length, + expectedFiles.length * 2, // one redirect + one stop + "There should be twice as many end markers in the parent process as requested files." + ); + Assert.equal( + contentStopMarkers.length, + expectedFiles.length, + "There should be as many stop markers in the content process as requested files." + ); + + for (const [i, expectedFile] of expectedFiles.entries()) { + info( + `Checking if "${expectedFile}" if present in the network markers in both processes.` + ); + const [parentRedirectMarker, parentStopMarker] = parentEndMarkers.filter( + marker => marker.data.URI === expectedFile + ); + const contentMarker = contentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + + const commonDataProperties = { + type: "Network", + URI: expectedFile, + requestMethod: "GET", + contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/), + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + }; + + const expectedProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(expectedFile)}` + ), + }; + + Assert.objectContains(parentRedirectMarker, expectedProperties); + Assert.objectContains(parentStopMarker, expectedProperties); + Assert.objectContains(contentMarker, expectedProperties); + if (i === 0) { + // This is the top level navigation, the HTML file. + Assert.objectContainsOnly(parentRedirectMarker.data, { + ...commonDataProperties, + status: "STATUS_REDIRECT", + contentType: null, + cache: "Unresolved", + RedirectURI: expectedFile, + redirectType: "Internal", + redirectId: parentStopMarker.data.id, + isHttpToHttpsRedirect: false, + }); + + Assert.objectContainsOnly(parentStopMarker.data, { + ...commonDataProperties, + status: "STATUS_STOP", + }); + + Assert.objectContainsOnly(contentMarker.data, { + ...commonDataProperties, + status: "STATUS_STOP", + }); + } else { + Assert.objectContainsOnly(parentRedirectMarker.data, { + ...commonDataProperties, + status: "STATUS_REDIRECT", + contentType: null, + cache: "Unresolved", + innerWindowID: Expect.number(), + RedirectURI: expectedFile, + redirectType: "Internal", + redirectId: parentStopMarker.data.id, + isHttpToHttpsRedirect: false, + }); + + Assert.objectContainsOnly( + parentStopMarker.data, + // Note: in the future we may have more properties. We're using the + // "Only" flavor of the matcher so that we don't forget to update this + // test when this changes. + { + ...commonDataProperties, + innerWindowID: Expect.number(), + status: "STATUS_STOP", + } + ); + + Assert.objectContainsOnly(contentMarker.data, { + ...commonDataProperties, + innerWindowID: Expect.number(), + status: "STATUS_STOP", + }); + } + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js new file mode 100644 index 0000000000..ad2cc81661 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js @@ -0,0 +1,218 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test the case of a service worker that has no fetch + * handlers. In this case, a fetch is done to the network. There may be + * shortcuts in our code in this case, that's why it's important to test it + * separately. + */ + +const serviceWorkerFileName = "serviceworker_no_fetch_handler.js"; +registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData()); + +add_task(async function test_network_markers_service_worker_setup() { + // Disabling cache makes the result more predictable. Also this makes things + // simpler when dealing with service workers. + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + ], + }); +}); + +add_task(async function test_network_markers_service_worker_register() { + // In this first step, we request an HTML page that will register a service + // worker. We'll wait until the service worker is fully installed before + // checking various things. + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + await SpecialPowers.spawn( + contentBrowser, + [serviceWorkerFileName], + async function (serviceWorkerFileName) { + await content.wrappedJSObject.registerServiceWorkerAndWait( + serviceWorkerFileName + ); + } + ); + + // Let's make sure we actually have a registered service workers. + const workers = await SpecialPowers.registeredServiceWorkers(); + Assert.equal( + workers.length, + 1, + "One service worker should be properly registered." + ); + }); +}); + +add_task(async function test_network_markers_service_worker_use() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + // By logging a few information about the threads we make debugging easier. + logInformationForThread("parentThread information", parentThread); + logInformationForThread("contentThread information", contentThread); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread) + // When we load a page, Firefox will check the service worker freshness + // after a few seconds. So when the test lasts a long time (with some test + // environments) we might see spurious markers about that that we're not + // interesting in in this part of the test. They're only present in the + // parent process. + .filter(marker => !marker.data.URI.includes(serviceWorkerFileName)); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + + // Here are some logs to ease debugging. + info( + "Parent network markers:" + JSON.stringify(parentNetworkMarkers, null, 2) + ); + info( + "Content network markers:" + + JSON.stringify(contentNetworkMarkers, null, 2) + ); + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + + // Let's look at all pairs and make sure we requested all expected files. + const parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker); + const contentStopMarkers = contentPairs.map( + ([_, stopMarker]) => stopMarker + ); + + // These are the files requested by the page. + // We should see markers for the parent thread and the content thread. + const expectedFiles = [ + // Please take care that the first element is the top level navigation, as + // this is special-cased below. + "serviceworker_page.html", + "firefox-logo-nightly.svg", + ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`); + + Assert.equal( + parentStopMarkers.length, + expectedFiles.length, + "There should be as many stop markers in the parent process as requested files." + ); + Assert.equal( + contentStopMarkers.length, + expectedFiles.length, + "There should be as many stop markers in the content process as requested files." + ); + + for (const [i, expectedFile] of expectedFiles.entries()) { + info( + `Checking if "${expectedFile}" if present in the network markers in both processes.` + ); + const parentMarker = parentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + const contentMarker = contentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + + const commonProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(expectedFile)}` + ), + }; + Assert.objectContains(parentMarker, commonProperties); + Assert.objectContains(contentMarker, commonProperties); + + // We get the full set of properties in this case, because we do an actual + // fetch to the network. + const commonDataProperties = { + type: "Network", + status: "STATUS_STOP", + URI: expectedFile, + requestMethod: "GET", + contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/), + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + count: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + }; + + if (i === 0) { + // The first marker is special cased: this is the top level navigation + // serviceworker_page.html, + // and in this case we don't have all the same properties. Especially + // the innerWindowID information is missing. + Assert.objectContainsOnly(parentMarker.data, { + ...commonDataProperties, + // Note that the parent process has the "cache" information, but not the content + // process. See Bug 1544821. + // Also because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + + Assert.objectContainsOnly(contentMarker.data, commonDataProperties); + } else { + // This is the other file firefox-logo-nightly.svg. + Assert.objectContainsOnly(parentMarker.data, { + ...commonDataProperties, + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + innerWindowID: Expect.number(), + }); + + Assert.objectContainsOnly(contentMarker.data, { + ...commonDataProperties, + innerWindowID: Expect.number(), + }); + } + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js new file mode 100644 index 0000000000..973ae61a7f --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js @@ -0,0 +1,294 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test the case of a service worker that has a fetch + * handler, but no respondWith. In this case, some process called "reset + * interception" happens, and the fetch is still carried on by our code. Because + * this is a bit of an edge case, it's important to have a test for this case. + */ + +const serviceWorkerFileName = + "serviceworker_no_respondWith_in_fetch_handler.js"; +registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData()); + +add_task(async function test_network_markers_service_worker_setup() { + // Disabling cache makes the result more predictable. Also this makes things + // simpler when dealing with service workers. + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + ], + }); +}); + +add_task(async function test_network_markers_service_worker_register() { + // In this first step, we request an HTML page that will register a service + // worker. We'll wait until the service worker is fully installed before + // checking various things. + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + await SpecialPowers.spawn( + contentBrowser, + [serviceWorkerFileName], + async function (serviceWorkerFileName) { + await content.wrappedJSObject.registerServiceWorkerAndWait( + serviceWorkerFileName + ); + } + ); + + // Let's make sure we actually have a registered service workers. + const workers = await SpecialPowers.registeredServiceWorkers(); + Assert.equal( + workers.length, + 1, + "One service worker should be properly registered." + ); + }); +}); + +add_task(async function test_network_markers_service_worker_use() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + // By logging a few information about the threads we make debugging easier. + logInformationForThread("parentThread information", parentThread); + logInformationForThread("contentThread information", contentThread); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread) + // When we load a page, Firefox will check the service worker freshness + // after a few seconds. So when the test lasts a long time (with some test + // environments) we might see spurious markers about that that we're not + // interesting in in this part of the test. They're only present in the + // parent process. + .filter(marker => !marker.data.URI.includes(serviceWorkerFileName)); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + + // Here are some logs to ease debugging. + info( + "Parent network markers:" + JSON.stringify(parentNetworkMarkers, null, 2) + ); + info( + "Content network markers:" + + JSON.stringify(contentNetworkMarkers, null, 2) + ); + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + + // Let's look at all pairs and make sure we requested all expected files. + // In this test, we should have redirect markers as well as stop markers, + // because this case generates internal redirects. We may want to change + // that in the future, or handle this specially in the frontend. + // Let's create various arrays to help assert. + + const parentEndMarkers = parentPairs.map(([_, stopMarker]) => stopMarker); + const parentStopMarkers = parentEndMarkers.filter( + marker => marker.data.status === "STATUS_STOP" + ); + const parentRedirectMarkers = parentEndMarkers.filter( + marker => marker.data.status === "STATUS_REDIRECT" + ); + const contentEndMarkers = contentPairs.map(([_, stopMarker]) => stopMarker); + const contentStopMarkers = contentEndMarkers.filter( + marker => marker.data.status === "STATUS_STOP" + ); + const contentRedirectMarkers = contentEndMarkers.filter( + marker => marker.data.status === "STATUS_REDIRECT" + ); + + // These are the files requested by the page. + // We should see markers for the parent thread and the content thread. + const expectedFiles = [ + // Please take care that the first element is the top level navigation, as + // this is special-cased below. + "serviceworker_page.html", + "firefox-logo-nightly.svg", + ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`); + + Assert.equal( + parentStopMarkers.length, + expectedFiles.length, + "There should be as many stop markers in the parent process as requested files." + ); + Assert.equal( + parentRedirectMarkers.length, + expectedFiles.length * 2, // http -> intercepted, intercepted -> http + "There should be twice as many redirect markers in the parent process as requested files." + ); + Assert.equal( + contentStopMarkers.length, + expectedFiles.length, + "There should be as many stop markers in the content process as requested files." + ); + // Note: there will no redirect markers in the content process for + // ServiceWorker fallbacks request to network. + // See Bug 1793940. + Assert.equal( + contentRedirectMarkers.length, + 0, + "There should be no redirect markers in the content process than requested files." + ); + + for (const [i, expectedFile] of expectedFiles.entries()) { + info( + `Checking if "${expectedFile}" if present in the network markers in both processes.` + ); + const [parentRedirectMarkerIntercept, parentRedirectMarkerReset] = + parentRedirectMarkers.filter( + marker => marker.data.URI === expectedFile + ); + const parentStopMarker = parentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + const contentStopMarker = contentStopMarkers.find( + marker => marker.data.URI === expectedFile + ); + + const commonProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(expectedFile)}` + ), + }; + Assert.objectContains(parentRedirectMarkerIntercept, commonProperties); + Assert.objectContains(parentRedirectMarkerReset, commonProperties); + Assert.objectContains(parentStopMarker, commonProperties); + Assert.objectContains(contentStopMarker, commonProperties); + // Note: there's no check for the contentRedirectMarker, because there's + // no marker for a top level navigation redirect in the content process. + + // We get the full set of properties in this case, because we do an actual + // fetch to the network. + const commonDataProperties = { + type: "Network", + status: "STATUS_STOP", + URI: expectedFile, + requestMethod: "GET", + contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/), + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + count: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + }; + + const commonRedirectProperties = { + type: "Network", + status: "STATUS_REDIRECT", + URI: expectedFile, + RedirectURI: expectedFile, + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + redirectType: "Internal", + isHttpToHttpsRedirect: false, + }; + + if (i === 0) { + // The first marker is special cased: this is the top level navigation + // serviceworker_page.html, + // and in this case we don't have all the same properties. Especially + // the innerWindowID information is missing. + Assert.objectContainsOnly(parentStopMarker.data, { + ...commonDataProperties, + // Note that the parent process has the "cache" information, but not the content + // process. See Bug 1544821. + // Also, because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + Assert.objectContainsOnly(contentStopMarker.data, commonDataProperties); + + Assert.objectContainsOnly(parentRedirectMarkerIntercept.data, { + ...commonRedirectProperties, + redirectId: parentRedirectMarkerReset.data.id, + cache: "Unresolved", + }); + Assert.objectContainsOnly(parentRedirectMarkerReset.data, { + ...commonRedirectProperties, + redirectId: parentStopMarker.data.id, + }); + + // Note: there's no check for the contentRedirectMarker, because there's + // no marker for a top level navigation redirect in the content process. + } else { + // This is the other file firefox-logo-nightly.svg. + Assert.objectContainsOnly(parentStopMarker.data, { + ...commonDataProperties, + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + innerWindowID: Expect.number(), + }); + Assert.objectContains(contentStopMarker, commonProperties); + Assert.objectContainsOnly(contentStopMarker.data, { + ...commonDataProperties, + innerWindowID: Expect.number(), + }); + + Assert.objectContainsOnly(parentRedirectMarkerIntercept.data, { + ...commonRedirectProperties, + innerWindowID: Expect.number(), + redirectId: parentRedirectMarkerReset.data.id, + cache: "Unresolved", + }); + Assert.objectContainsOnly(parentRedirectMarkerReset.data, { + ...commonRedirectProperties, + innerWindowID: Expect.number(), + redirectId: parentStopMarker.data.id, + }); + } + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js new file mode 100644 index 0000000000..060592840a --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js @@ -0,0 +1,480 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test a service worker that returns a synthetized response. + * This means the service worker will make up a response by itself. + */ + +const serviceWorkerFileName = "serviceworker_synthetized_response.js"; +registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData()); + +add_task(async function test_network_markers_service_worker_setup() { + // Disabling cache makes the result more predictable. Also this makes things + // simpler when dealing with service workers. + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + ], + }); +}); + +add_task(async function test_network_markers_service_worker_register() { + // In this first step, we request an HTML page that will register a service + // worker. We'll wait until the service worker is fully installed before + // checking various things. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + await SpecialPowers.spawn( + contentBrowser, + [serviceWorkerFileName], + async function (serviceWorkerFileName) { + await content.wrappedJSObject.registerServiceWorkerAndWait( + serviceWorkerFileName + ); + } + ); + + // Let's make sure we actually have a registered service workers. + const workers = await SpecialPowers.registeredServiceWorkers(); + Assert.equal( + workers.length, + 1, + "One service worker should be properly registered." + ); + }); +}); + +add_task(async function test_network_markers_service_worker_use() { + // In this test, we'll first load a plain html file, then do some fetch + // requests in the context of the page. One request is served with a + // synthetized response, the other request is served with a real "fetch" done + // by the service worker. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_simple.html`; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + await SpecialPowers.spawn(contentBrowser, [], async () => { + // This request is served directly by the service worker as a synthetized response. + await content + .fetch("firefox-generated.svg") + .then(res => res.arrayBuffer()); + + // This request is served by a fetch done inside the service worker. + await content + .fetch("firefox-logo-nightly.svg") + .then(res => res.arrayBuffer()); + }); + + const { parentThread, contentThread, profile } = + await stopProfilerNowAndGetThreads(contentPid); + + // The service worker work happens in a third "thread" or process, let's try + // to find it. + // Currently the fetches happen on the main thread for the content process, + // this may change in the future and we may have to adapt this function. + // Also please note this isn't necessarily the same content process as the + // ones for the tab. + const { serviceWorkerParentThread } = findServiceWorkerThreads(profile); + + ok( + serviceWorkerParentThread, + "We should find a thread for the service worker." + ); + + // By logging a few information about the threads we make debugging easier. + logInformationForThread("parentThread information", parentThread); + logInformationForThread("contentThread information", contentThread); + logInformationForThread( + "serviceWorkerParentThread information", + serviceWorkerParentThread + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread) + // When we load a page, Firefox will check the service worker freshness + // after a few seconds. So when the test lasts a long time (with some test + // environments) we might see spurious markers about that that we're not + // interesting in in this part of the test. They're only present in the + // parent process. + .filter(marker => !marker.data.URI.includes(serviceWorkerFileName)); + + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + const serviceWorkerNetworkMarkers = getInflatedNetworkMarkers( + serviceWorkerParentThread + ); + + // Some more logs for debugging purposes. + info( + "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2) + ); + info( + "Content network markers: " + + JSON.stringify(contentNetworkMarkers, null, 2) + ); + info( + "Serviceworker network markers: " + + JSON.stringify(serviceWorkerNetworkMarkers, null, 2) + ); + + const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers); + const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers); + const serviceWorkerPairs = getPairsOfNetworkMarkers( + serviceWorkerNetworkMarkers + ); + + // First, make sure we properly matched all start with stop markers. This + // means that both arrays should contain only arrays of 2 elements. + parentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.` + ) + ); + + contentPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.` + ) + ); + serviceWorkerPairs.forEach(pair => + Assert.equal( + pair.length, + 2, + `For the URL ${pair[0].data.URI} we should get 2 markers in the service worker process.` + ) + ); + + // Let's look at all pairs and make sure we requested all expected files. + // In this test, we should have redirect markers as well as stop markers, + // because this case generates internal redirects. + // Let's create various arrays to help assert. + + let parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker); + const contentStopMarkers = contentPairs.map( + ([_, stopMarker]) => stopMarker + ); + // In this test we have very different results in the various threads, so + // we'll assert every case separately. + // A simple function to help constructing better assertions: + const fullUrl = filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`; + + { + // In the parent process, we have 8 network markers: + // - twice the html file -- because it's not cached by the SW, we get the + // marker both for the initial request and for the request initied from the + // SW. + // - twice the firefox svg file -- similar situation + // - once the generated svg file -- this one isn't fetched by the SW but + // rather forged directly, so there's no "second fetch", and thus we have + // only one marker. + // - for each of these files, we have first an internal redirect from the + // main channel to the service worker. => 3 redirect markers more. + Assert.equal( + parentStopMarkers.length, + 8, // 3 html files, 3 firefox svg files, 2 generated svg file + "There should be 8 stop markers in the parent process." + ); + + // The "1" requests are the initial requests that are intercepted, coming + // from the web page, while the "2" requests are requests to the network, + // coming from the service worker. The 1 were requested before 2, 2 ends + // before 1. + // "Intercept" requests are the internal redirects from the main channel + // to the service worker. They happen before others. + const [ + htmlFetchIntercept, + htmlFetch1, + htmlFetch2, + generatedSvgIntercept, + generatedSvgFetch, + firefoxSvgIntercept, + firefoxSvgFetch1, + firefoxSvgFetch2, + ] = parentStopMarkers; + + /* ----- /HTML FILE ---- */ + Assert.objectContains(htmlFetchIntercept, { + name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_REDIRECT", + URI: fullUrl("serviceworker_simple.html"), + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + redirectId: htmlFetch1.data.id, + redirectType: "Internal", + isHttpToHttpsRedirect: false, + RedirectURI: fullUrl("serviceworker_simple.html"), + cache: "Unresolved", + }), + }); + + Assert.objectContains(htmlFetch1, { + name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("serviceworker_simple.html"), + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + }), + }); + Assert.objectContains(htmlFetch2, { + name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("serviceworker_simple.html"), + requestMethod: "GET", + contentType: "text/html", + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + }), + }); + /* ----- /HTML FILE ---- */ + + /* ----- GENERATED SVG FILE ---- */ + Assert.objectContains(generatedSvgIntercept, { + name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_REDIRECT", + URI: fullUrl("firefox-generated.svg"), + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + redirectId: generatedSvgFetch.data.id, + redirectType: "Internal", + isHttpToHttpsRedirect: false, + RedirectURI: fullUrl("firefox-generated.svg"), + cache: "Unresolved", + innerWindowID: Expect.number(), + }), + }); + Assert.objectContains(generatedSvgFetch, { + name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("firefox-generated.svg"), + requestMethod: "GET", + contentType: "image/svg+xml", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + innerWindowID: Expect.number(), + }), + }); + /* ----- ∕GENERATED SVG FILE ---- */ + /* ----- REQUESTED SVG FILE ---- */ + Assert.objectContains(firefoxSvgIntercept, { + name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_REDIRECT", + URI: fullUrl("firefox-logo-nightly.svg"), + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + redirectId: firefoxSvgFetch1.data.id, + redirectType: "Internal", + isHttpToHttpsRedirect: false, + RedirectURI: fullUrl("firefox-logo-nightly.svg"), + cache: "Unresolved", + innerWindowID: Expect.number(), + }), + }); + Assert.objectContains(firefoxSvgFetch1, { + name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("firefox-logo-nightly.svg"), + requestMethod: "GET", + contentType: "image/svg+xml", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + innerWindowID: Expect.number(), + }), + }); + Assert.objectContains(firefoxSvgFetch2, { + name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("firefox-logo-nightly.svg"), + requestMethod: "GET", + contentType: "image/svg+xml", + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + // Note: no innerWindowID here, is that a bug? + }), + }); + /* ----- ∕REQUESTED SVG FILE ---- */ + } + + // It's possible that the service worker thread IS the content thread, in + // that case we'll get all markers in the same thread. + // The "1" requests are the initial requests that are intercepted, coming + // from the web page, while the "2" requests are the requests coming from + // the service worker. + let htmlFetch1, generatedSvgFetch1, firefoxSvgFetch1; + + // First, let's handle the case where the threads are different: + if (serviceWorkerParentThread !== contentThread) { + // In the content process (that is the process for the web page), we have + // 3 network markers: + // - 1 for the HTML page + // - 1 for the generated svg file + // - 1 for the firefox svg file + // Indeed, the service worker interception is invisible from the context + // of the web page, so we just get 3 "normal" requests. However these + // requests will miss all timing information, because they're hidden by + // the service worker interception. We may want to fix this... + Assert.equal( + contentStopMarkers.length, + 3, // 1 for each file + "There should be 3 stop markers in the content process." + ); + + [htmlFetch1, generatedSvgFetch1, firefoxSvgFetch1] = contentStopMarkers; + } else { + // Else case: the service worker parent thread IS the content thread + // (note: this is always the case with fission). In that case all network + // markers tested in the above block are together in the same object. + Assert.equal( + contentStopMarkers.length, + 5, + "There should be 5 stop markers in the combined process (containing both the content page and the service worker)" + ); + + // Because of how the test is done, these markers are ordered by the + // position of the START markers. + [ + // For the htmlFetch request, note that 2 is before 1, because that's + // the top level navigation. Indeed for the top level navigation + // everything happens first in the main process, possibly before a + // content process even exists, and the content process is merely + // notified at the end. + htmlFetch1, + generatedSvgFetch1, + firefoxSvgFetch1, + ] = contentStopMarkers; + } + + // Let's test first the markers coming from the content page. + Assert.objectContains(htmlFetch1, { + name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("serviceworker_simple.html"), + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + }), + }); + Assert.objectContains(generatedSvgFetch1, { + name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("firefox-generated.svg"), + requestMethod: "GET", + contentType: "image/svg+xml", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + innerWindowID: Expect.number(), + }), + }); + Assert.objectContains(firefoxSvgFetch1, { + name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_STOP", + URI: fullUrl("firefox-logo-nightly.svg"), + requestMethod: "GET", + contentType: "image/svg+xml", + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + pri: Expect.number(), + innerWindowID: Expect.number(), + }), + }); + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_simple.js b/tools/profiler/tests/browser/browser_test_marker_network_simple.js new file mode 100644 index 0000000000..15894305a7 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_simple.js @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly + */ +add_task(async function test_network_markers() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = BASE_URL + "simple.html?cacheBust=" + Math.random(); + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + info(JSON.stringify(parentNetworkMarkers, null, 2)); + info(JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 2, + `We should get a pair of network markers in the parent thread.` + ); + Assert.equal( + contentNetworkMarkers.length, + 2, + `We should get a pair of network markers in the content thread.` + ); + + const parentStopMarker = parentNetworkMarkers[1]; + const contentStopMarker = contentNetworkMarkers[1]; + + const expectedProperties = { + name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`), + data: Expect.objectContains({ + status: "STATUS_STOP", + URI: url, + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + }), + }; + + Assert.objectContains(parentStopMarker, expectedProperties); + // The cache information is missing from the content marker, it's only part + // of the parent marker. See Bug 1544821. + Assert.objectContains(parentStopMarker.data, { + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + Assert.objectContains(contentStopMarker, expectedProperties); + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_marker_network_sts.js b/tools/profiler/tests/browser/browser_test_marker_network_sts.js new file mode 100644 index 0000000000..26f2a1c756 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_marker_network_sts.js @@ -0,0 +1,130 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we emit network markers accordingly. + * In this file we'll test that we behave properly with STS redirections. + */ + +add_task(async function test_network_markers_service_worker_setup() { + await SpecialPowers.pushPrefEnv({ + set: [ + // Disabling cache makes the result more predictable especially in verify mode. + ["browser.cache.disk.enable", false], + ["browser.cache.memory.enable", false], + // We want to test upgrading requests + ["dom.security.https_only_mode", true], + ], + }); +}); + +add_task(async function test_network_markers_redirect_to_https() { + // In this test, we request an HTML page with http that gets redirected to https. + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + startProfilerForMarkerTests(); + + const url = BASE_URL + "simple.html"; + const targetUrl = BASE_URL_HTTPS + "simple.html"; + + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + const { parentThread, contentThread } = await stopProfilerNowAndGetThreads( + contentPid + ); + + const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread); + const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread); + info(JSON.stringify(parentNetworkMarkers, null, 2)); + info(JSON.stringify(contentNetworkMarkers, null, 2)); + + Assert.equal( + parentNetworkMarkers.length, + 4, + `We should get 2 pairs of network markers in the parent thread.` + ); + + /* It looks like that for a redirection for the top level navigation, the + * content thread sees the markers for the second request only. + * See Bug 1692879. */ + Assert.equal( + contentNetworkMarkers.length, + 2, + `We should get one pair of network markers in the content thread.` + ); + + const parentRedirectMarker = parentNetworkMarkers[1]; + const parentStopMarker = parentNetworkMarkers[3]; + // There's no content redirect marker for the reason outlined above. + const contentStopMarker = contentNetworkMarkers[1]; + + Assert.objectContains(parentRedirectMarker, { + name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`), + data: Expect.objectContainsOnly({ + type: "Network", + status: "STATUS_REDIRECT", + URI: url, + RedirectURI: targetUrl, + requestMethod: "GET", + contentType: null, + startTime: Expect.number(), + endTime: Expect.number(), + id: Expect.number(), + redirectId: parentStopMarker.data.id, + pri: Expect.number(), + cache: "Unresolved", + redirectType: "Permanent", + isHttpToHttpsRedirect: true, + }), + }); + + const expectedProperties = { + name: Expect.stringMatches( + `Load \\d+:.*${escapeStringRegexp(targetUrl)}` + ), + }; + const expectedDataProperties = { + type: "Network", + status: "STATUS_STOP", + URI: targetUrl, + requestMethod: "GET", + contentType: "text/html", + startTime: Expect.number(), + endTime: Expect.number(), + domainLookupStart: Expect.number(), + domainLookupEnd: Expect.number(), + connectStart: Expect.number(), + tcpConnectEnd: Expect.number(), + connectEnd: Expect.number(), + requestStart: Expect.number(), + responseStart: Expect.number(), + responseEnd: Expect.number(), + id: Expect.number(), + count: Expect.number(), + pri: Expect.number(), + }; + + Assert.objectContains(parentStopMarker, expectedProperties); + Assert.objectContains(contentStopMarker, expectedProperties); + + // The cache information is missing from the content marker, it's only part + // of the parent marker. See Bug 1544821. + Assert.objectContainsOnly(parentStopMarker.data, { + ...expectedDataProperties, + // Because the request races with the cache, these 2 values are valid: + // "Missed" when the cache answered before we get a result from the network. + // "Unresolved" when we got a response from the network before the cache subsystem. + cache: Expect.stringMatches(/^(Missed|Unresolved)$/), + }); + Assert.objectContainsOnly(contentStopMarker.data, expectedDataProperties); + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_markers_gc_cc.js b/tools/profiler/tests/browser/browser_test_markers_gc_cc.js new file mode 100644 index 0000000000..a4a94d60cc --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_markers_gc_cc.js @@ -0,0 +1,49 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async function test_markers_gc_cc() { + info("Test GC&CC markers."); + + info("Create a throwaway profile."); + await startProfiler({}); + let tempProfileContainer = { profile: null }; + tempProfileContainer.profile = await waitSamplingAndStopAndGetProfile(); + + info("Restart the profiler."); + await startProfiler({}); + + info("Throw away the previous profile, which should be garbage-collected."); + Assert.equal( + typeof tempProfileContainer.profile, + "object", + "Previously-captured profile should be an object" + ); + delete tempProfileContainer.profile; + Assert.equal( + typeof tempProfileContainer.profile, + "undefined", + "Deleted profile should now be undefined" + ); + + info("Force GC&CC"); + SpecialPowers.gc(); + SpecialPowers.forceShrinkingGC(); + SpecialPowers.forceCC(); + SpecialPowers.gc(); + SpecialPowers.forceShrinkingGC(); + SpecialPowers.forceCC(); + + info("Stop the profiler and get the profile."); + const profile = await waitSamplingAndStopAndGetProfile(); + + const markers = getInflatedMarkerData(profile.threads[0]); + Assert.ok( + markers.some(({ data }) => data?.type === "GCSlice"), + "A GCSlice marker was recorded" + ); + Assert.ok( + markers.some(({ data }) => data?.type === "CCSlice"), + "A CCSlice marker was recorded" + ); +}); diff --git a/tools/profiler/tests/browser/browser_test_markers_parent_process.js b/tools/profiler/tests/browser/browser_test_markers_parent_process.js new file mode 100644 index 0000000000..28b82f8054 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_markers_parent_process.js @@ -0,0 +1,37 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async function test_markers_parent_process() { + info("Test markers that are generated by the browser's parent process."); + + info("Start the profiler in nostacksampling mode."); + await startProfiler({ features: ["nostacksampling"] }); + + info("Dispatch a DOMEvent"); + window.dispatchEvent(new Event("synthetic")); + + info("Stop the profiler and get the profile."); + const profile = await stopNowAndGetProfile(); + + const markers = getInflatedMarkerData(profile.threads[0]); + { + const domEventStart = markers.find( + ({ phase, data }) => + phase === INTERVAL_START && data?.eventType === "synthetic" + ); + const domEventEnd = markers.find( + ({ phase, data }) => + phase === INTERVAL_END && data?.eventType === "synthetic" + ); + ok(domEventStart, "A start DOMEvent was generated"); + ok(domEventEnd, "An end DOMEvent was generated"); + ok( + domEventEnd.data.latency > 0, + "DOMEvent had a a latency value generated." + ); + ok(domEventEnd.data.type === "DOMEvent"); + ok(domEventEnd.name === "DOMEvent"); + } + // Add more marker tests. +}); diff --git a/tools/profiler/tests/browser/browser_test_markers_preferencereads.js b/tools/profiler/tests/browser/browser_test_markers_preferencereads.js new file mode 100644 index 0000000000..0ae183f874 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_markers_preferencereads.js @@ -0,0 +1,73 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +requestLongerTimeout(10); + +const kContentPref = "font.size.variable.x-western"; + +function countPrefReadsInThread(pref, thread) { + let count = 0; + for (let payload of getPayloadsOfType(thread, "Preference")) { + if (payload.prefName === pref) { + count++; + } + } + return count; +} + +async function waitForPaintAfterLoad() { + return SpecialPowers.spawn(gBrowser.selectedBrowser, [], () => { + return new Promise(function (resolve) { + function listener() { + if (content.document.readyState == "complete") { + content.requestAnimationFrame(() => content.setTimeout(resolve, 0)); + } + } + if (content.document.readyState != "complete") { + content.document.addEventListener("readystatechange", listener); + } else { + listener(); + } + }); + }); +} + +/** + * Test the Preference Read markers. + */ +add_task(async function test_profile_preferencereads_markers() { + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + await startProfiler({ features: ["js"] }); + + const url = BASE_URL + "single_frame.html"; + await BrowserTestUtils.withNewTab(url, async contentBrowser => { + const contentPid = await SpecialPowers.spawn( + contentBrowser, + [], + () => Services.appinfo.processID + ); + + await waitForPaintAfterLoad(); + + // Ensure we read a pref in the content process. + await SpecialPowers.spawn(contentBrowser, [kContentPref], pref => { + Services.prefs.getIntPref(pref); + }); + + // Check that some Preference Read profile markers were generated. + { + const { contentThread } = await stopProfilerNowAndGetThreads(contentPid); + + Assert.greater( + countPrefReadsInThread(kContentPref, contentThread), + 0, + `Preference Read profile markers for ${kContentPref} were recorded.` + ); + } + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js b/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js new file mode 100644 index 0000000000..14d76dbcaf --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js @@ -0,0 +1,199 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function ProcessHasSamplerThread(process) { + return process.threads.some(t => t.name == "SamplerThread"); +} + +async function GetPidsWithSamplerThread() { + let parentProc = await ChromeUtils.requestProcInfo(); + + let pids = parentProc.children + .filter(ProcessHasSamplerThread) + .map(proc => proc.pid); + if (ProcessHasSamplerThread(parentProc)) { + pids.unshift(parentProc.pid); + } + return pids; +} + +// fnFilterWithContentId: Called with content child pid, returns filters to use. +// E.g.: 123 => ["GeckoMain", "pid:123"], or 123 => ["pid:456"]. +async function test_with_filter(fnFilterWithContentId) { + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still some open tabs."); + await Services.profiler.ClearAllPages(); + + info("Open a tab with single_frame.html in it."); + const url = BASE_URL + "single_frame.html"; + return BrowserTestUtils.withNewTab(url, async function (contentBrowser) { + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + Assert.deepEqual( + await GetPidsWithSamplerThread(), + [], + "There should be no SamplerThreads before starting the profiler" + ); + + info("Start the profiler to test filters including 'pid:'."); + await startProfiler({ threads: fnFilterWithContentId(contentPid) }); + + let pidsWithSamplerThread = null; + await TestUtils.waitForCondition( + async function () { + let pidsStringBefore = JSON.stringify(pidsWithSamplerThread); + pidsWithSamplerThread = await GetPidsWithSamplerThread(); + return JSON.stringify(pidsWithSamplerThread) == pidsStringBefore; + }, + "Wait for sampler threads to stabilize after profiler start", + /* interval (ms) */ 250, + /* maxTries */ 10 + ); + + info("Capture the profile data."); + const profile = await waitSamplingAndStopAndGetProfile(); + + await TestUtils.waitForCondition(async function () { + return !(await GetPidsWithSamplerThread()).length; + }, "Wait for all sampler threads to stop after profiler stop"); + + return { contentPid, pidsWithSamplerThread, profile }; + }); +} + +add_task(async function browser_test_profile_capture_along_with_content_pid() { + const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter( + contentPid => ["GeckoMain", "pid:" + contentPid] + ); + + Assert.greater( + pidsWithSamplerThread.length, + 2, + "There should be lots of SamplerThreads after starting the profiler" + ); + + let contentProcessIndex = profile.processes.findIndex( + p => p.threads[0].pid == contentPid + ); + Assert.notEqual( + contentProcessIndex, + -1, + "The content process should be present" + ); + + // Note: Some threads may not be registered, so we can't expect that many. But + // 10 is much more than the default 4. + Assert.greater( + profile.processes[contentProcessIndex].threads.length, + 10, + "The content process should have many threads" + ); + + Assert.equal( + profile.threads.length, + 1, + "The parent process should have only one thread" + ); + Assert.equal( + profile.threads[0].name, + "GeckoMain", + "The parent process should have the main thread" + ); +}); + +add_task(async function browser_test_profile_capture_along_with_other_pid() { + const parentPid = Services.appinfo.processID; + const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter( + contentPid => ["GeckoMain", "pid:" + parentPid] + ); + + Assert.greater( + pidsWithSamplerThread.length, + 2, + "There should be lots of SamplerThreads after starting the profiler" + ); + + let contentProcessIndex = profile.processes.findIndex( + p => p.threads[0].pid == contentPid + ); + Assert.notEqual( + contentProcessIndex, + -1, + "The content process should be present" + ); + + Assert.equal( + profile.processes[contentProcessIndex].threads.length, + 1, + "The content process should have only one thread" + ); + + // Note: Some threads may not be registered, so we can't expect that many. But + // 10 is much more than the default 4. + Assert.greater( + profile.threads.length, + 10, + "The parent process should have many threads" + ); +}); + +add_task(async function browser_test_profile_capture_by_only_content_pid() { + const parentPid = Services.appinfo.processID; + const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter( + contentPid => ["pid:" + contentPid] + ); + + // The sampler thread always runs in the parent process, see bug 1754100. + Assert.deepEqual( + pidsWithSamplerThread, + [parentPid, contentPid], + "There should only be SamplerThreads in the parent and the target child" + ); + + Assert.equal( + profile.processes.length, + 1, + "There should only be one child process" + ); + // Note: Some threads may not be registered, so we can't expect that many. But + // 10 is much more than the default 4. + Assert.greater( + profile.processes[0].threads.length, + 10, + "The child process should have many threads" + ); + Assert.equal( + profile.processes[0].threads[0].pid, + contentPid, + "The only child process should be our content" + ); +}); + +add_task(async function browser_test_profile_capture_by_only_parent_pid() { + const parentPid = Services.appinfo.processID; + const { pidsWithSamplerThread, profile } = await test_with_filter( + contentPid => ["pid:" + parentPid] + ); + + Assert.deepEqual( + pidsWithSamplerThread, + [parentPid], + "There should only be a SamplerThread in the parent" + ); + + // Note: Some threads may not be registered, so we can't expect that many. But + // 10 is much more than the default 4. + Assert.greater( + profile.threads.length, + 10, + "The parent process should have many threads" + ); + Assert.equal( + profile.processes.length, + 0, + "There should be no child processes" + ); +}); diff --git a/tools/profiler/tests/browser/browser_test_profile_fission.js b/tools/profiler/tests/browser/browser_test_profile_fission.js new file mode 100644 index 0000000000..775fc8048e --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_profile_fission.js @@ -0,0 +1,191 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +if (SpecialPowers.useRemoteSubframes) { + // Bug 1586105: these tests could time out in some extremely slow conditions, + // when fission is enabled. + // Requesting a longer timeout should make it pass. + requestLongerTimeout(2); +} + +add_task(async function test_profile_fission_no_private_browsing() { + // Requesting the complete log to be able to debug Bug 1586105. + SimpleTest.requestCompleteLog(); + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still have some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler(); + + info("Open a private window with single_frame.html in it."); + const win = await BrowserTestUtils.openNewBrowserWindow({ + fission: true, + }); + + try { + const url = BASE_URL_HTTPS + "single_frame.html"; + const contentBrowser = win.gBrowser.selectedBrowser; + BrowserTestUtils.loadURIString(contentBrowser, url); + await BrowserTestUtils.browserLoaded(contentBrowser, false, url); + + const parentPid = Services.appinfo.processID; + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const activeTabID = contentBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const { profile, contentProcess, contentThread } = + await stopProfilerNowAndGetThreads(contentPid); + + Assert.equal( + contentThread.isPrivateBrowsing, + false, + "The content process has the private browsing flag set to false." + ); + + Assert.equal( + contentThread.userContextId, + 0, + "The content process has the information about the container used for this process" + ); + + info( + "Check if the captured page is the one with correct values we created." + ); + + let pageFound = false; + for (const page of contentProcess.pages) { + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, false); + pageFound = true; + break; + } + } + Assert.equal(pageFound, true); + + info("Check that the profiling logs exist with the expected properties."); + Assert.equal(typeof profile.profilingLog, "object"); + Assert.equal(typeof profile.profilingLog[parentPid], "object"); + const parentLog = profile.profilingLog[parentPid]; + Assert.equal(typeof parentLog.profilingLogBegin_TSms, "number"); + Assert.equal(typeof parentLog.profilingLogEnd_TSms, "number"); + Assert.equal(typeof parentLog.bufferGlobalController, "object"); + Assert.equal( + typeof parentLog.bufferGlobalController.controllerCreationTime_TSms, + "number" + ); + + Assert.equal(typeof profile.profileGatheringLog, "object"); + Assert.equal(typeof profile.profileGatheringLog[parentPid], "object"); + Assert.equal( + typeof profile.profileGatheringLog[parentPid] + .profileGatheringLogBegin_TSms, + "number" + ); + Assert.equal( + typeof profile.profileGatheringLog[parentPid].profileGatheringLogEnd_TSms, + "number" + ); + + Assert.equal(typeof contentProcess.profilingLog, "object"); + Assert.equal(typeof contentProcess.profilingLog[contentPid], "object"); + Assert.equal( + typeof contentProcess.profilingLog[contentPid].profilingLogBegin_TSms, + "number" + ); + Assert.equal( + typeof contentProcess.profilingLog[contentPid].profilingLogEnd_TSms, + "number" + ); + + Assert.equal(typeof contentProcess.profileGatheringLog, "undefined"); + } finally { + await BrowserTestUtils.closeWindow(win); + } +}); + +add_task(async function test_profile_fission_private_browsing() { + // Requesting the complete log to be able to debug Bug 1586105. + SimpleTest.requestCompleteLog(); + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still have some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler(); + + info("Open a private window with single_frame.html in it."); + const win = await BrowserTestUtils.openNewBrowserWindow({ + private: true, + fission: true, + }); + + try { + const url = BASE_URL_HTTPS + "single_frame.html"; + const contentBrowser = win.gBrowser.selectedBrowser; + BrowserTestUtils.loadURIString(contentBrowser, url); + await BrowserTestUtils.browserLoaded(contentBrowser, false, url); + + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const activeTabID = contentBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const { contentProcess, contentThread } = + await stopProfilerNowAndGetThreads(contentPid); + + Assert.equal( + contentThread.isPrivateBrowsing, + true, + "The content process has the private browsing flag set to true." + ); + + Assert.equal( + contentThread.userContextId, + 0, + "The content process has the information about the container used for this process" + ); + + info( + "Check if the captured page is the one with correct values we created." + ); + + let pageFound = false; + for (const page of contentProcess.pages) { + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, true); + pageFound = true; + break; + } + } + Assert.equal(pageFound, true); + } finally { + await BrowserTestUtils.closeWindow(win); + } +}); diff --git a/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js b/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js new file mode 100644 index 0000000000..854587678d --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js @@ -0,0 +1,83 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +if (SpecialPowers.useRemoteSubframes) { + // Bug 1586105: these tests could time out in some extremely slow conditions, + // when fission is enabled. + // Requesting a longer timeout should make it pass. + requestLongerTimeout(2); +} + +add_task(async function test_profile_multi_frame_page_info() { + // Requesting the complete log to be able to debug Bug 1586105. + SimpleTest.requestCompleteLog(); + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still have some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with multi frame page." + ); + await startProfiler(); + + info("Open a tab with multi_frame.html in it."); + // multi_frame.html embeds single_frame.html inside an iframe. + const url = BASE_URL + "multi_frame.html"; + await BrowserTestUtils.withNewTab(url, async function (contentBrowser) { + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const win = Services.wm.getMostRecentWindow("navigator:browser"); + const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const { contentProcess } = await stopProfilerNowAndGetThreads(contentPid); + + info( + "Check if the captured pages are the ones with correct values we created." + ); + + let parentPage; + let foundPage = 0; + for (const page of contentProcess.pages) { + // Parent page + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, false); + parentPage = page; + foundPage++; + break; + } + } + + Assert.notEqual(typeof parentPage, "undefined"); + + for (const page of contentProcess.pages) { + // Child page (iframe) + if (page.url == BASE_URL + "single_frame.html") { + Assert.equal(page.url, BASE_URL + "single_frame.html"); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + Assert.equal(typeof page.embedderInnerWindowID, "number"); + Assert.notEqual(typeof parentPage, "undefined"); + Assert.equal(page.embedderInnerWindowID, parentPage.innerWindowID); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, false); + foundPage++; + break; + } + } + + Assert.equal(foundPage, 2); + }); +}); diff --git a/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js b/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js new file mode 100644 index 0000000000..240213be56 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js @@ -0,0 +1,132 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +if (SpecialPowers.useRemoteSubframes) { + // Bug 1586105: these tests could time out in some extremely slow conditions, + // when fission is enabled. + // Requesting a longer timeout should make it pass. + requestLongerTimeout(2); +} + +add_task(async function test_profile_single_frame_page_info() { + // Requesting the complete log to be able to debug Bug 1586105. + SimpleTest.requestCompleteLog(); + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still have some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler(); + + info("Open a tab with single_frame.html in it."); + const url = BASE_URL + "single_frame.html"; + await BrowserTestUtils.withNewTab(url, async function (contentBrowser) { + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const win = Services.wm.getMostRecentWindow("navigator:browser"); + const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const { contentProcess } = await stopProfilerNowAndGetThreads(contentPid); + + info( + "Check if the captured page is the one with correct values we created." + ); + + let pageFound = false; + for (const page of contentProcess.pages) { + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, false); + pageFound = true; + break; + } + } + Assert.equal(pageFound, true); + }); +}); + +add_task(async function test_profile_private_browsing() { + // Requesting the complete log to be able to debug Bug 1586105. + SimpleTest.requestCompleteLog(); + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still have some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler(); + + info("Open a private window with single_frame.html in it."); + const win = await BrowserTestUtils.openNewBrowserWindow({ + fission: false, + private: true, + }); + + try { + const url = BASE_URL_HTTPS + "single_frame.html"; + const contentBrowser = win.gBrowser.selectedBrowser; + BrowserTestUtils.loadURIString(contentBrowser, url); + await BrowserTestUtils.browserLoaded(contentBrowser, false, url); + + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const activeTabID = contentBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const { contentProcess, contentThread } = + await stopProfilerNowAndGetThreads(contentPid); + + // This information is available with fission only. + Assert.equal( + contentThread.isPrivateBrowsing, + undefined, + "The content process has no private browsing flag." + ); + + Assert.equal( + contentThread.userContextId, + undefined, + "The content process has no information about the container used for this process." + ); + + info( + "Check if the captured page is the one with correct values we created." + ); + + let pageFound = false; + for (const page of contentProcess.pages) { + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + Assert.equal(typeof page.isPrivateBrowsing, "boolean"); + Assert.equal(page.isPrivateBrowsing, true); + pageFound = true; + break; + } + } + Assert.equal(pageFound, true); + } finally { + await BrowserTestUtils.closeWindow(win); + } +}); diff --git a/tools/profiler/tests/browser/browser_test_profile_slow_capture.js b/tools/profiler/tests/browser/browser_test_profile_slow_capture.js new file mode 100644 index 0000000000..4a675b84d1 --- /dev/null +++ b/tools/profiler/tests/browser/browser_test_profile_slow_capture.js @@ -0,0 +1,104 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async function browser_test_profile_slow_capture() { + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler({ threads: ["GeckoMain", "test-debug-child-slow-json"] }); + + info("Open a tab with single_frame.html in it."); + const url = BASE_URL + "single_frame.html"; + await BrowserTestUtils.withNewTab(url, async function (contentBrowser) { + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + // Getting the active Browser ID to assert the page info tabID later. + const win = Services.wm.getMostRecentWindow("navigator:browser"); + const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId; + + info("Capture the profile data."); + const profile = await waitSamplingAndStopAndGetProfile(); + + let pageFound = false; + // We need to find the correct content process for that tab. + let contentProcess = profile.processes.find( + p => p.threads[0].pid == contentPid + ); + + if (!contentProcess) { + throw new Error( + `Could not find the content process with given pid: ${contentPid}` + ); + } + + info( + "Check if the captured page is the one with correct values we created." + ); + + for (const page of contentProcess.pages) { + if (page.url == url) { + Assert.equal(page.url, url); + Assert.equal(typeof page.tabID, "number"); + Assert.equal(page.tabID, activeTabID); + Assert.equal(typeof page.innerWindowID, "number"); + // Top level document will have no embedder. + Assert.equal(page.embedderInnerWindowID, 0); + pageFound = true; + break; + } + } + Assert.equal(pageFound, true); + + info("Flush slow processes with a quick profile."); + await startProfiler(); + for (let i = 0; i < 10; ++i) { + await Services.profiler.waitOnePeriodicSampling(); + } + await stopNowAndGetProfile(); + }); +}); + +add_task(async function browser_test_profile_very_slow_capture() { + Assert.ok(!Services.profiler.IsActive()); + info("Clear the previous pages just in case we still some open tabs."); + await Services.profiler.ClearAllPages(); + + info( + "Start the profiler to test the page information with single frame page." + ); + await startProfiler({ + threads: ["GeckoMain", "test-debug-child-very-slow-json"], + }); + + info("Open a tab with single_frame.html in it."); + const url = BASE_URL + "single_frame.html"; + await BrowserTestUtils.withNewTab(url, async function (contentBrowser) { + const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => { + return Services.appinfo.processID; + }); + + info("Capture the profile data."); + const profile = await waitSamplingAndStopAndGetProfile(); + + info("Check that the content process is missing."); + + let contentProcessIndex = profile.processes.findIndex( + p => p.threads[0].pid == contentPid + ); + Assert.equal(contentProcessIndex, -1); + + info("Flush slow processes with a quick profile."); + await startProfiler(); + for (let i = 0; i < 10; ++i) { + await Services.profiler.waitOnePeriodicSampling(); + } + await stopNowAndGetProfile(); + }); +}); diff --git a/tools/profiler/tests/browser/do_work_500ms.html b/tools/profiler/tests/browser/do_work_500ms.html new file mode 100644 index 0000000000..9713a80671 --- /dev/null +++ b/tools/profiler/tests/browser/do_work_500ms.html @@ -0,0 +1,41 @@ + + + + + Do some work for 500ms + + + + Do some work for 500ms. + + diff --git a/tools/profiler/tests/browser/firefox-logo-nightly.svg b/tools/profiler/tests/browser/firefox-logo-nightly.svg new file mode 100644 index 0000000000..f1af370d87 --- /dev/null +++ b/tools/profiler/tests/browser/firefox-logo-nightly.svg @@ -0,0 +1 @@ +firefox-logo-nightly \ No newline at end of file diff --git a/tools/profiler/tests/browser/head.js b/tools/profiler/tests/browser/head.js new file mode 100644 index 0000000000..ef0e3128c0 --- /dev/null +++ b/tools/profiler/tests/browser/head.js @@ -0,0 +1,159 @@ +/* import-globals-from ../shared-head.js */ + +Services.scriptloader.loadSubScript( + "chrome://mochitests/content/browser/tools/profiler/tests/browser/shared-head.js", + this +); + +const BASE_URL = "http://example.com/browser/tools/profiler/tests/browser/"; +const BASE_URL_HTTPS = + "https://example.com/browser/tools/profiler/tests/browser/"; + +registerCleanupFunction(async () => { + if (Services.profiler.IsActive()) { + info( + "The profiler was found to still be running at the end of the test, which means that some error likely occured. Let's stop it to prevent issues with following tests!" + ); + await Services.profiler.StopProfiler(); + } +}); + +/** + * This is a helper function that will stop the profiler and returns the main + * threads for the parent process and the content process with PID contentPid. + * This happens immediately, without waiting for any sampling to happen or + * finish. Use waitSamplingAndStopProfilerAndGetThreads below instead to wait + * for samples before stopping. + * This returns also the full profile in case the caller wants more information. + * + * @param {number} contentPid + * @returns {Promise<{profile, parentThread, contentProcess, contentThread}>} + */ +async function stopProfilerNowAndGetThreads(contentPid) { + const profile = await stopNowAndGetProfile(); + + const parentThread = profile.threads[0]; + const contentProcess = profile.processes.find( + p => p.threads[0].pid == contentPid + ); + if (!contentProcess) { + throw new Error( + `Could not find the content process with given pid: ${contentPid}` + ); + } + + if (!parentThread) { + throw new Error("The parent thread was not found in the profile."); + } + + const contentThread = contentProcess.threads[0]; + if (!contentThread) { + throw new Error("The content thread was not found in the profile."); + } + + return { profile, parentThread, contentProcess, contentThread }; +} + +/** + * This is a helper function that will stop the profiler and returns the main + * threads for the parent process and the content process with PID contentPid. + * As opposed to stopProfilerNowAndGetThreads (with "Now") above, the profiler + * in that PID will not stop until there is at least one periodic sample taken. + * + * @param {number} contentPid + * @returns {Promise<{profile, parentThread, contentProcess, contentThread}>} + */ +async function waitSamplingAndStopProfilerAndGetThreads(contentPid) { + await Services.profiler.waitOnePeriodicSampling(); + + return stopProfilerNowAndGetThreads(contentPid); +} + +/** This tries to find the service worker thread by targeting a very specific + * UserTiming marker. Indeed we use performance.mark to add this marker from the + * service worker's events. + * Then from this thread we get its parent thread. Indeed the parent thread is + * where all network stuff happens, so this is useful for network marker tests. + * + * @param {Object} profile + * @returns {{ serviceWorkerThread: Object, serviceWorkerParentThread: Object }} the found threads + */ +function findServiceWorkerThreads(profile) { + const allThreads = [ + profile.threads, + ...profile.processes.map(process => process.threads), + ].flat(); + + const serviceWorkerThread = allThreads.find( + ({ processType, markers }) => + processType === "tab" && + markers.data.some(markerTuple => { + const data = markerTuple[markers.schema.data]; + return ( + data && + data.type === "UserTiming" && + data.name === "__serviceworker_event" + ); + }) + ); + + if (!serviceWorkerThread) { + info( + "We couldn't find a service worker thread. Here are all the threads in this profile:" + ); + allThreads.forEach(logInformationForThread.bind(null, "")); + return null; + } + + const serviceWorkerParentThread = allThreads.find( + ({ name, pid }) => pid === serviceWorkerThread.pid && name === "GeckoMain" + ); + + if (!serviceWorkerParentThread) { + info( + `We couldn't find a parent thread for the service worker thread (pid: ${serviceWorkerThread.pid}, tid: ${serviceWorkerThread.tid}).` + ); + info("Here are all the threads in this profile:"); + allThreads.forEach(logInformationForThread.bind(null, "")); + + // Let's write the profile on disk if MOZ_UPLOAD_DIR is present + const path = Services.env.get("MOZ_UPLOAD_DIR"); + if (path) { + const profileName = `profile_${Date.now()}.json`; + const profilePath = PathUtils.join(path, profileName); + info( + `We wrote down the profile on disk as an artifact, with name ${profileName}.` + ); + // This function returns a Promise, but we're not waiting on it because + // we're in a synchronous function. Hopefully writing will be finished + // when the process ends. + IOUtils.writeJSON(profilePath, profile).catch(err => + console.error("An error happened when writing the profile on disk", err) + ); + } + throw new Error( + "We couldn't find a parent thread for the service worker thread. Please read logs to find more information." + ); + } + + return { serviceWorkerThread, serviceWorkerParentThread }; +} + +/** + * This logs some basic information about the passed thread. + * + * @param {string} prefix + * @param {Object} thread + */ +function logInformationForThread(prefix, thread) { + if (!thread) { + info(prefix + ": thread is null or undefined."); + return; + } + + const { name, pid, tid, processName, processType } = thread; + info( + `${prefix}: ` + + `name(${name}) pid(${pid}) tid(${tid}) processName(${processName}) processType(${processType})` + ); +} diff --git a/tools/profiler/tests/browser/multi_frame.html b/tools/profiler/tests/browser/multi_frame.html new file mode 100644 index 0000000000..b2efcedd50 --- /dev/null +++ b/tools/profiler/tests/browser/multi_frame.html @@ -0,0 +1,11 @@ + + + + + Multi Frame + + + Multi Frame + + + diff --git a/tools/profiler/tests/browser/page_with_resources.html b/tools/profiler/tests/browser/page_with_resources.html new file mode 100644 index 0000000000..9d2bb8f218 --- /dev/null +++ b/tools/profiler/tests/browser/page_with_resources.html @@ -0,0 +1,11 @@ + + + + + + + Testing + + + + diff --git a/tools/profiler/tests/browser/redirect.sjs b/tools/profiler/tests/browser/redirect.sjs new file mode 100644 index 0000000000..2a325c3d0b --- /dev/null +++ b/tools/profiler/tests/browser/redirect.sjs @@ -0,0 +1,8 @@ +function handleRequest(request, response) { + response.setStatusLine(request.httpVersion, 301, "Moved Permanently"); + response.setHeader( + "Location", + decodeURIComponent(request.queryString), + false + ); +} diff --git a/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg b/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg new file mode 100644 index 0000000000..f1af370d87 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg @@ -0,0 +1 @@ +firefox-logo-nightly \ No newline at end of file diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js b/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js new file mode 100644 index 0000000000..16a9f0c91f --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js @@ -0,0 +1,39 @@ +// Most of this file has been stolen from dom/serviceworkers/test/utils.js. + +function waitForState(worker, state) { + return new Promise((resolve, reject) => { + function onStateChange() { + if (worker.state === state) { + worker.removeEventListener("statechange", onStateChange); + resolve(); + } + if (worker.state === "redundant") { + worker.removeEventListener("statechange", onStateChange); + reject(new Error("The service worker failed to install.")); + } + } + + // First add an event listener, so we won't miss any change that happens + // before we check the current state. + worker.addEventListener("statechange", onStateChange); + + // Now check if the worker is already in the desired state. + onStateChange(); + }); +} + +async function registerServiceWorkerAndWait(serviceWorkerFile) { + if (!serviceWorkerFile) { + throw new Error( + "No service worker filename has been specified. Please specify a valid filename." + ); + } + + console.log(`...registering the serviceworker "${serviceWorkerFile}"`); + const reg = await navigator.serviceWorker.register(`./${serviceWorkerFile}`, { + scope: "./", + }); + console.log("...waiting for activation"); + await waitForState(reg.installing, "activated"); + console.log("...activated!"); +} diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js new file mode 100644 index 0000000000..baa07fd6d8 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js @@ -0,0 +1,34 @@ +const files = ["serviceworker_page.html", "firefox-logo-nightly.svg"]; +const cacheName = "v1"; + +self.addEventListener("install", event => { + performance.mark("__serviceworker_event"); + console.log("[SW]:", "Install event"); + + event.waitUntil(cacheAssets()); +}); + +async function cacheAssets() { + const cache = await caches.open(cacheName); + await cache.addAll(files); +} + +self.addEventListener("fetch", event => { + performance.mark("__serviceworker_event"); + console.log("Handling fetch event for", event.request.url); + event.respondWith(handleFetch(event.request)); +}); + +async function handleFetch(request) { + const cachedResponse = await caches.match(request); + if (cachedResponse) { + console.log("Found response in cache:", cachedResponse); + + return cachedResponse; + } + console.log("No response found in cache. About to fetch from network..."); + + const networkResponse = await fetch(request); + console.log("Response from network is:", networkResponse); + return networkResponse; +} diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js new file mode 100644 index 0000000000..f656665ca0 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js @@ -0,0 +1,4 @@ +self.addEventListener("install", event => { + performance.mark("__serviceworker_event"); + console.log("[SW]:", "Install event"); +}); diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js new file mode 100644 index 0000000000..255c8269a1 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js @@ -0,0 +1,9 @@ +self.addEventListener("install", event => { + performance.mark("__serviceworker_event"); + console.log("[SW]:", "Install event"); +}); + +self.addEventListener("fetch", event => { + performance.mark("__serviceworker_event"); + console.log("Handling fetch event for", event.request.url); +}); diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html new file mode 100644 index 0000000000..1c2100a9d6 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html new file mode 100644 index 0000000000..86719787f4 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html new file mode 100644 index 0000000000..f7c32d02c3 --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html @@ -0,0 +1,9 @@ + + + + + + + Testing + + diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js new file mode 100644 index 0000000000..891b679a5f --- /dev/null +++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js @@ -0,0 +1,27 @@ +self.addEventListener("install", event => { + performance.mark("__serviceworker_event"); + dump("[SW]:", "Install event\n"); +}); + +self.addEventListener("fetch", event => { + performance.mark("__serviceworker_event"); + dump(`Handling fetch event for ${event.request.url}\n`); + event.respondWith(handleFetch(event.request)); +}); + +async function handleFetch(request) { + if (request.url.endsWith("-generated.svg")) { + dump( + "An icon file that should be generated was requested, let's answer directly.\n" + ); + return new Response( + `firefox-logo-nightly`, + { headers: { "content-type": "image/svg+xml" } } + ); + } + + dump( + `A normal URL ${request.url} has been requested, let's fetch it from the network.\n` + ); + return fetch(request); +} diff --git a/tools/profiler/tests/browser/simple.html b/tools/profiler/tests/browser/simple.html new file mode 100644 index 0000000000..f7c32d02c3 --- /dev/null +++ b/tools/profiler/tests/browser/simple.html @@ -0,0 +1,9 @@ + + + + + + + Testing + + diff --git a/tools/profiler/tests/browser/single_frame.html b/tools/profiler/tests/browser/single_frame.html new file mode 100644 index 0000000000..ebdfc41da2 --- /dev/null +++ b/tools/profiler/tests/browser/single_frame.html @@ -0,0 +1,10 @@ + + + + + Single Frame + + + Single Frame + + diff --git a/tools/profiler/tests/chrome/chrome.ini b/tools/profiler/tests/chrome/chrome.ini new file mode 100644 index 0000000000..7089b8fb8e --- /dev/null +++ b/tools/profiler/tests/chrome/chrome.ini @@ -0,0 +1,8 @@ +[DEFAULT] +skip-if = tsan # Bug 1804081 +support-files=profiler_test_utils.js + +[test_profile_worker_bug_1428076.html] +skip-if = os == 'android' && processor == 'arm' # Bug 1541291 +[test_profile_worker.html] +skip-if = os == 'android' && processor == 'arm' # Bug 1541291 diff --git a/tools/profiler/tests/chrome/profiler_test_utils.js b/tools/profiler/tests/chrome/profiler_test_utils.js new file mode 100644 index 0000000000..d2e4499b34 --- /dev/null +++ b/tools/profiler/tests/chrome/profiler_test_utils.js @@ -0,0 +1,66 @@ +"use strict"; + +(function () { + async function startProfiler(settings) { + let startPromise = Services.profiler.StartProfiler( + settings.entries, + settings.interval, + settings.features, + settings.threads, + 0, + settings.duration + ); + + info("Parent Profiler has started"); + + await startPromise; + + info("Child profilers have started"); + } + + function getProfile() { + const profile = Services.profiler.getProfileData(); + info( + "We got a profile, run the mochitest with `--keep-open true` to see the logged profile in the Web Console." + ); + + // Run the mochitest with `--keep-open true` to see the logged profile in the + // Web console. + console.log(profile); + + return profile; + } + + async function stopProfiler() { + let stopPromise = Services.profiler.StopProfiler(); + info("Parent profiler has stopped"); + await stopPromise; + info("Child profilers have stopped"); + } + + function end(error) { + if (error) { + ok(false, `We got an error: ${error}`); + } else { + ok(true, "We ran the whole process"); + } + SimpleTest.finish(); + } + + async function runTest(settings, workload) { + SimpleTest.waitForExplicitFinish(); + try { + await startProfiler(settings); + await workload(); + await getProfile(); + await stopProfiler(); + await end(); + } catch (e) { + // By catching and handling the error, we're being nice to mochitest + // runners: instead of waiting for the timeout, we fail right away. + await end(e); + } + } + + window.runTest = runTest; +})(); diff --git a/tools/profiler/tests/chrome/test_profile_worker.html b/tools/profiler/tests/chrome/test_profile_worker.html new file mode 100644 index 0000000000..8e2bae7fbd --- /dev/null +++ b/tools/profiler/tests/chrome/test_profile_worker.html @@ -0,0 +1,66 @@ + + + + + + Test for Bug 1428076 + + + + +Mozilla Bug 1428076 + + + + + + diff --git a/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html b/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html new file mode 100644 index 0000000000..abe0e5748a --- /dev/null +++ b/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html @@ -0,0 +1,58 @@ + + + + + + Test for Bug 1428076 + + + + +Mozilla Bug 1428076 + + + + + + diff --git a/tools/profiler/tests/gtest/GeckoProfiler.cpp b/tools/profiler/tests/gtest/GeckoProfiler.cpp new file mode 100644 index 0000000000..78456662f5 --- /dev/null +++ b/tools/profiler/tests/gtest/GeckoProfiler.cpp @@ -0,0 +1,5099 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file tests a lot of the profiler_*() functions in GeckoProfiler.h. +// Most of the tests just check that nothing untoward (e.g. crashes, deadlocks) +// happens when calling these functions. They don't do much inspection of +// profiler internals. + +#include "mozilla/ProfilerThreadPlatformData.h" +#include "mozilla/ProfilerThreadRegistration.h" +#include "mozilla/ProfilerThreadRegistrationInfo.h" +#include "mozilla/ProfilerThreadRegistry.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/ProgressLogger.h" +#include "mozilla/UniquePtrExtensions.h" + +#include "nsIThread.h" +#include "nsThreadUtils.h" +#include "prthread.h" + +#include "gtest/gtest.h" +#include "mozilla/gtest/MozAssertions.h" + +#include + +#if defined(_MSC_VER) || defined(__MINGW32__) +# include +# include +#elif defined(__APPLE__) +# include +#endif + +#ifdef XP_WIN +#include "mozilla/WindowsVersion.h" +#endif + +#ifdef MOZ_GECKO_PROFILER + +# include "GeckoProfiler.h" +# include "mozilla/ProfilerMarkerTypes.h" +# include "mozilla/ProfilerMarkers.h" +# include "NetworkMarker.h" +# include "platform.h" +# include "ProfileBuffer.h" +# include "ProfilerControl.h" + +# include "js/Initialization.h" +# include "js/Printf.h" +# include "jsapi.h" +# include "json/json.h" +# include "mozilla/Atomics.h" +# include "mozilla/BlocksRingBuffer.h" +# include "mozilla/DataMutex.h" +# include "mozilla/ProfileBufferEntrySerializationGeckoExtensions.h" +# include "mozilla/ProfileJSONWriter.h" +# include "mozilla/ScopeExit.h" +# include "mozilla/net/HttpBaseChannel.h" +# include "nsIChannelEventSink.h" +# include "nsIThread.h" +# include "nsThreadUtils.h" + +# include +# include + +#endif // MOZ_GECKO_PROFILER + +// Note: profiler_init() has already been called in XRE_main(), so we can't +// test it here. Likewise for profiler_shutdown(), and AutoProfilerInit +// (which is just an RAII wrapper for profiler_init() and profiler_shutdown()). + +using namespace mozilla; + +TEST(GeckoProfiler, ProfilerUtils) +{ + profiler_init_main_thread_id(); + + static_assert(std::is_same_v); + static_assert( + std::is_same_v); + ProfilerProcessId processId = profiler_current_process_id(); + EXPECT_TRUE(processId.IsSpecified()); + EXPECT_EQ(processId, baseprofiler::profiler_current_process_id()); + + static_assert( + std::is_same_v); + static_assert( + std::is_same_v); + EXPECT_EQ(profiler_current_thread_id(), + baseprofiler::profiler_current_thread_id()); + + ProfilerThreadId mainTestThreadId = profiler_current_thread_id(); + EXPECT_TRUE(mainTestThreadId.IsSpecified()); + + ProfilerThreadId mainThreadId = profiler_main_thread_id(); + EXPECT_TRUE(mainThreadId.IsSpecified()); + + EXPECT_EQ(mainThreadId, mainTestThreadId) + << "Test should run on the main thread"; + EXPECT_TRUE(profiler_is_main_thread()); + + std::thread testThread([&]() { + EXPECT_EQ(profiler_current_process_id(), processId); + + const ProfilerThreadId testThreadId = profiler_current_thread_id(); + EXPECT_TRUE(testThreadId.IsSpecified()); + EXPECT_NE(testThreadId, mainThreadId); + EXPECT_FALSE(profiler_is_main_thread()); + + EXPECT_EQ(baseprofiler::profiler_current_process_id(), processId); + EXPECT_EQ(baseprofiler::profiler_current_thread_id(), testThreadId); + EXPECT_EQ(baseprofiler::profiler_main_thread_id(), mainThreadId); + EXPECT_FALSE(baseprofiler::profiler_is_main_thread()); + }); + testThread.join(); +} + +TEST(GeckoProfiler, ThreadRegistrationInfo) +{ + profiler_init_main_thread_id(); + + TimeStamp ts = TimeStamp::Now(); + { + profiler::ThreadRegistrationInfo trInfo{ + "name", ProfilerThreadId::FromNumber(123), false, ts}; + EXPECT_STREQ(trInfo.Name(), "name"); + EXPECT_NE(trInfo.Name(), "name") + << "ThreadRegistrationInfo should keep its own copy of the name"; + EXPECT_EQ(trInfo.RegisterTime(), ts); + EXPECT_EQ(trInfo.ThreadId(), ProfilerThreadId::FromNumber(123)); + EXPECT_EQ(trInfo.IsMainThread(), false); + } + + // Make sure the next timestamp will be different from `ts`. + while (TimeStamp::Now() == ts) { + } + + { + profiler::ThreadRegistrationInfo trInfoHere{"Here"}; + EXPECT_STREQ(trInfoHere.Name(), "Here"); + EXPECT_NE(trInfoHere.Name(), "Here") + << "ThreadRegistrationInfo should keep its own copy of the name"; + TimeStamp baseRegistrationTime = + baseprofiler::detail::GetThreadRegistrationTime(); + if (baseRegistrationTime) { + EXPECT_EQ(trInfoHere.RegisterTime(), baseRegistrationTime); + } else { + EXPECT_GT(trInfoHere.RegisterTime(), ts); + } + EXPECT_EQ(trInfoHere.ThreadId(), profiler_current_thread_id()); + EXPECT_EQ(trInfoHere.ThreadId(), profiler_main_thread_id()) + << "Gtests are assumed to run on the main thread"; + EXPECT_EQ(trInfoHere.IsMainThread(), true) + << "Gtests are assumed to run on the main thread"; + } + + { + // Sub-thread test. + // These will receive sub-thread data (to test move at thread end). + TimeStamp tsThread; + ProfilerThreadId threadThreadId; + UniquePtr trInfoThreadPtr; + + std::thread testThread([&]() { + profiler::ThreadRegistrationInfo trInfoThread{"Thread"}; + EXPECT_STREQ(trInfoThread.Name(), "Thread"); + EXPECT_NE(trInfoThread.Name(), "Thread") + << "ThreadRegistrationInfo should keep its own copy of the name"; + EXPECT_GT(trInfoThread.RegisterTime(), ts); + EXPECT_EQ(trInfoThread.ThreadId(), profiler_current_thread_id()); + EXPECT_NE(trInfoThread.ThreadId(), profiler_main_thread_id()); + EXPECT_EQ(trInfoThread.IsMainThread(), false); + + tsThread = trInfoThread.RegisterTime(); + threadThreadId = trInfoThread.ThreadId(); + trInfoThreadPtr = + MakeUnique(std::move(trInfoThread)); + }); + testThread.join(); + + ASSERT_NE(trInfoThreadPtr, nullptr); + EXPECT_STREQ(trInfoThreadPtr->Name(), "Thread"); + EXPECT_EQ(trInfoThreadPtr->RegisterTime(), tsThread); + EXPECT_EQ(trInfoThreadPtr->ThreadId(), threadThreadId); + EXPECT_EQ(trInfoThreadPtr->IsMainThread(), false) + << "Gtests are assumed to run on the main thread"; + } +} + +static constexpr ThreadProfilingFeatures scEachAndAnyThreadProfilingFeatures[] = + {ThreadProfilingFeatures::CPUUtilization, ThreadProfilingFeatures::Sampling, + ThreadProfilingFeatures::Markers, ThreadProfilingFeatures::Any}; + +TEST(GeckoProfiler, ThreadProfilingFeaturesType) +{ + ASSERT_EQ(static_cast(ThreadProfilingFeatures::Any), 1u + 2u + 4u) + << "This test assumes that there are 3 binary choices 1+2+4; " + "Is this test up to date?"; + + EXPECT_EQ(Combine(ThreadProfilingFeatures::CPUUtilization, + ThreadProfilingFeatures::Sampling, + ThreadProfilingFeatures::Markers), + ThreadProfilingFeatures::Any); + + constexpr ThreadProfilingFeatures allThreadProfilingFeatures[] = { + ThreadProfilingFeatures::NotProfiled, + ThreadProfilingFeatures::CPUUtilization, + ThreadProfilingFeatures::Sampling, ThreadProfilingFeatures::Markers, + ThreadProfilingFeatures::Any}; + + for (ThreadProfilingFeatures f1 : allThreadProfilingFeatures) { + // Combine and Intersect are commutative. + for (ThreadProfilingFeatures f2 : allThreadProfilingFeatures) { + EXPECT_EQ(Combine(f1, f2), Combine(f2, f1)); + EXPECT_EQ(Intersect(f1, f2), Intersect(f2, f1)); + } + + // Combine works like OR. + EXPECT_EQ(Combine(f1, f1), f1); + EXPECT_EQ(Combine(f1, f1, f1), f1); + + // 'OR NotProfiled' doesn't change anything. + EXPECT_EQ(Combine(f1, ThreadProfilingFeatures::NotProfiled), f1); + + // 'OR Any' makes Any. + EXPECT_EQ(Combine(f1, ThreadProfilingFeatures::Any), + ThreadProfilingFeatures::Any); + + // Intersect works like AND. + EXPECT_EQ(Intersect(f1, f1), f1); + EXPECT_EQ(Intersect(f1, f1, f1), f1); + + // 'AND NotProfiled' erases anything. + EXPECT_EQ(Intersect(f1, ThreadProfilingFeatures::NotProfiled), + ThreadProfilingFeatures::NotProfiled); + + // 'AND Any' doesn't change anything. + EXPECT_EQ(Intersect(f1, ThreadProfilingFeatures::Any), f1); + } + + for (ThreadProfilingFeatures f1 : scEachAndAnyThreadProfilingFeatures) { + EXPECT_TRUE(DoFeaturesIntersect(f1, f1)); + + // NotProfiled doesn't intersect with any feature. + EXPECT_FALSE(DoFeaturesIntersect(f1, ThreadProfilingFeatures::NotProfiled)); + + // Any intersects with any feature. + EXPECT_TRUE(DoFeaturesIntersect(f1, ThreadProfilingFeatures::Any)); + } +} + +static void TestConstUnlockedConstReader( + const profiler::ThreadRegistration::UnlockedConstReader& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + EXPECT_STREQ(aData.Info().Name(), "Test thread"); + EXPECT_GE(aData.Info().RegisterTime(), aBeforeRegistration); + EXPECT_LE(aData.Info().RegisterTime(), aAfterRegistration); + EXPECT_EQ(aData.Info().ThreadId(), aThreadId); + EXPECT_FALSE(aData.Info().IsMainThread()); + +#if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(MOZ_GECKO_PROFILER) + HANDLE threadHandle = aData.PlatformDataCRef().ProfiledThread(); + EXPECT_NE(threadHandle, nullptr); + EXPECT_EQ(ProfilerThreadId::FromNumber(::GetThreadId(threadHandle)), + aThreadId); + // Test calling QueryThreadCycleTime, we cannot assume that it will always + // work, but at least it shouldn't crash. + ULONG64 cycles; + (void)QueryThreadCycleTime(threadHandle, &cycles); +#elif defined(__APPLE__) && defined(MOZ_GECKO_PROFILER) + // Test calling thread_info, we cannot assume that it will always work, but at + // least it shouldn't crash. + thread_basic_info_data_t threadBasicInfo; + mach_msg_type_number_t basicCount = THREAD_BASIC_INFO_COUNT; + (void)thread_info( + aData.PlatformDataCRef().ProfiledThread(), THREAD_BASIC_INFO, + reinterpret_cast(&threadBasicInfo), &basicCount); +#elif (defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__)) && \ + defined(MOZ_GECKO_PROFILER) + // Test calling GetClockId, we cannot assume that it will always work, but at + // least it shouldn't crash. + Maybe maybeClockId = aData.PlatformDataCRef().GetClockId(); + if (maybeClockId) { + // Test calling clock_gettime, we cannot assume that it will always work, + // but at least it shouldn't crash. + timespec ts; + (void)clock_gettime(*maybeClockId, &ts); + } +#else + (void)aData.PlatformDataCRef(); +#endif + + EXPECT_GE(aData.StackTop(), aOnStackObject) + << "StackTop should be at &onStackChar, or higher on some " + "platforms"; +}; + +static void TestConstUnlockedConstReaderAndAtomicRW( + const profiler::ThreadRegistration::UnlockedConstReaderAndAtomicRW& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedConstReader(aData, aBeforeRegistration, aAfterRegistration, + aOnStackObject, aThreadId); + + (void)aData.ProfilingStackCRef(); + + EXPECT_EQ(aData.ProfilingFeatures(), ThreadProfilingFeatures::NotProfiled); + + EXPECT_FALSE(aData.IsSleeping()); +}; + +static void TestUnlockedConstReaderAndAtomicRW( + profiler::ThreadRegistration::UnlockedConstReaderAndAtomicRW& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + (void)aData.ProfilingStackRef(); + + EXPECT_FALSE(aData.IsSleeping()); + aData.SetSleeping(); + EXPECT_TRUE(aData.IsSleeping()); + aData.SetAwake(); + EXPECT_FALSE(aData.IsSleeping()); + + aData.ReinitializeOnResume(); + + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); + aData.SetSleeping(); + // After sleeping, the 2nd+ calls can duplicate. + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep()); + aData.ReinitializeOnResume(); + // After reinit (and sleeping), the 2nd+ calls can duplicate. + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep()); + aData.SetAwake(); + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); + EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep()); +}; + +static void TestConstUnlockedRWForLockedProfiler( + const profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + // We can't create a PSAutoLock here, so just verify that the call would + // compile and return the expected type. + static_assert(std::is_same_v())), + const ProfiledThreadData*>); +}; + +static void TestConstUnlockedReaderAndAtomicRWOnThread( + const profiler::ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& + aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + EXPECT_EQ(aData.GetJSContext(), nullptr); +}; + +static void TestUnlockedRWForLockedProfiler( + profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + TestUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + // No functions to test here. +}; + +static void TestUnlockedReaderAndAtomicRWOnThread( + profiler::ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + TestUnlockedRWForLockedProfiler(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + // No functions to test here. +}; + +static void TestConstLockedRWFromAnyThread( + const profiler::ThreadRegistration::LockedRWFromAnyThread& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr); + EXPECT_EQ(aData.GetEventTarget(), nullptr); +}; + +static void TestLockedRWFromAnyThread( + profiler::ThreadRegistration::LockedRWFromAnyThread& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration, + aOnStackObject, aThreadId); + TestUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration, + aAfterRegistration, aOnStackObject, + aThreadId); + + // We can't create a ProfiledThreadData nor PSAutoLock here, so just verify + // that the call would compile and return the expected type. + static_assert(std::is_same_v(), + std::declval(), + std::declval())), + void>); + + aData.ResetMainThread(nullptr); + + TimeDuration delay = TimeDuration::FromSeconds(1); + TimeDuration running = TimeDuration::FromSeconds(1); + aData.GetRunningEventDelay(TimeStamp::Now(), delay, running); + EXPECT_TRUE(delay.IsZero()); + EXPECT_TRUE(running.IsZero()); + + aData.StartJSSampling(123u); + aData.StopJSSampling(); +}; + +static void TestConstLockedRWOnThread( + const profiler::ThreadRegistration::LockedRWOnThread& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration, + aOnStackObject, aThreadId); + + // No functions to test here. +}; + +static void TestLockedRWOnThread( + profiler::ThreadRegistration::LockedRWOnThread& aData, + const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration, + const void* aOnStackObject, + ProfilerThreadId aThreadId = profiler_current_thread_id()) { + TestConstLockedRWOnThread(aData, aBeforeRegistration, aAfterRegistration, + aOnStackObject, aThreadId); + TestLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration, + aOnStackObject, aThreadId); + + // We don't want to really call SetJSContext here, so just verify that + // the call would compile and return the expected type. + static_assert( + std::is_same_v())), + void>); + aData.ClearJSContext(); + aData.PollJSSampling(); +}; + +TEST(GeckoProfiler, ThreadRegistration_DataAccess) +{ + using TR = profiler::ThreadRegistration; + + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + // Note that the main thread could already be registered, so we work in a new + // thread to test an actual registration that we control. + + std::thread testThread([&]() { + ASSERT_FALSE(TR::IsRegistered()) + << "A new std::thread should not start registered"; + EXPECT_FALSE(TR::GetOnThreadPtr()); + EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false)); + + char onStackChar; + + TimeStamp beforeRegistration = TimeStamp::Now(); + TR tr{"Test thread", &onStackChar}; + TimeStamp afterRegistration = TimeStamp::Now(); + + ASSERT_TRUE(TR::IsRegistered()); + + // Note: This test will mostly be about checking the correct access to + // thread data, depending on how it's obtained. Not all the functionality + // related to that data is tested (e.g., because it involves JS or other + // external dependencies that would be difficult to control here.) + + auto TestOnThreadRef = [&](TR::OnThreadRef aOnThreadRef) { + // To test const-qualified member functions. + const TR::OnThreadRef& onThreadCRef = aOnThreadRef; + + // const UnlockedConstReader (always const) + + TestConstUnlockedConstReader(onThreadCRef.UnlockedConstReaderCRef(), + beforeRegistration, afterRegistration, + &onStackChar); + onThreadCRef.WithUnlockedConstReader( + [&](const TR::UnlockedConstReader& aData) { + TestConstUnlockedConstReader(aData, beforeRegistration, + afterRegistration, &onStackChar); + }); + + // const UnlockedConstReaderAndAtomicRW + + TestConstUnlockedConstReaderAndAtomicRW( + onThreadCRef.UnlockedConstReaderAndAtomicRWCRef(), beforeRegistration, + afterRegistration, &onStackChar); + onThreadCRef.WithUnlockedConstReaderAndAtomicRW( + [&](const TR::UnlockedConstReaderAndAtomicRW& aData) { + TestConstUnlockedConstReaderAndAtomicRW( + aData, beforeRegistration, afterRegistration, &onStackChar); + }); + + // non-const UnlockedConstReaderAndAtomicRW + + TestUnlockedConstReaderAndAtomicRW( + aOnThreadRef.UnlockedConstReaderAndAtomicRWRef(), beforeRegistration, + afterRegistration, &onStackChar); + aOnThreadRef.WithUnlockedConstReaderAndAtomicRW( + [&](TR::UnlockedConstReaderAndAtomicRW& aData) { + TestUnlockedConstReaderAndAtomicRW(aData, beforeRegistration, + afterRegistration, &onStackChar); + }); + + // const UnlockedRWForLockedProfiler + + TestConstUnlockedRWForLockedProfiler( + onThreadCRef.UnlockedRWForLockedProfilerCRef(), beforeRegistration, + afterRegistration, &onStackChar); + onThreadCRef.WithUnlockedRWForLockedProfiler( + [&](const TR::UnlockedRWForLockedProfiler& aData) { + TestConstUnlockedRWForLockedProfiler( + aData, beforeRegistration, afterRegistration, &onStackChar); + }); + + // non-const UnlockedRWForLockedProfiler + + TestUnlockedRWForLockedProfiler( + aOnThreadRef.UnlockedRWForLockedProfilerRef(), beforeRegistration, + afterRegistration, &onStackChar); + aOnThreadRef.WithUnlockedRWForLockedProfiler( + [&](TR::UnlockedRWForLockedProfiler& aData) { + TestUnlockedRWForLockedProfiler(aData, beforeRegistration, + afterRegistration, &onStackChar); + }); + + // const UnlockedReaderAndAtomicRWOnThread + + TestConstUnlockedReaderAndAtomicRWOnThread( + onThreadCRef.UnlockedReaderAndAtomicRWOnThreadCRef(), + beforeRegistration, afterRegistration, &onStackChar); + onThreadCRef.WithUnlockedReaderAndAtomicRWOnThread( + [&](const TR::UnlockedReaderAndAtomicRWOnThread& aData) { + TestConstUnlockedReaderAndAtomicRWOnThread( + aData, beforeRegistration, afterRegistration, &onStackChar); + }); + + // non-const UnlockedReaderAndAtomicRWOnThread + + TestUnlockedReaderAndAtomicRWOnThread( + aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadRef(), + beforeRegistration, afterRegistration, &onStackChar); + aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread( + [&](TR::UnlockedReaderAndAtomicRWOnThread& aData) { + TestUnlockedReaderAndAtomicRWOnThread( + aData, beforeRegistration, afterRegistration, &onStackChar); + }); + + // LockedRWFromAnyThread + // Note: It cannot directly be accessed on the thread, this will be + // tested through LockedRWOnThread. + + // const LockedRWOnThread + + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + { + TR::OnThreadRef::ConstRWOnThreadWithLock constRWOnThreadWithLock = + onThreadCRef.ConstLockedRWOnThread(); + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + TestConstLockedRWOnThread(constRWOnThreadWithLock.DataCRef(), + beforeRegistration, afterRegistration, + &onStackChar); + } + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + onThreadCRef.WithConstLockedRWOnThread( + [&](const TR::LockedRWOnThread& aData) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + TestConstLockedRWOnThread(aData, beforeRegistration, + afterRegistration, &onStackChar); + }); + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + + // non-const LockedRWOnThread + + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + { + TR::OnThreadRef::RWOnThreadWithLock rwOnThreadWithLock = + aOnThreadRef.GetLockedRWOnThread(); + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + TestConstLockedRWOnThread(rwOnThreadWithLock.DataCRef(), + beforeRegistration, afterRegistration, + &onStackChar); + TestLockedRWOnThread(rwOnThreadWithLock.DataRef(), beforeRegistration, + afterRegistration, &onStackChar); + } + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + aOnThreadRef.WithLockedRWOnThread([&](TR::LockedRWOnThread& aData) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + TestLockedRWOnThread(aData, beforeRegistration, afterRegistration, + &onStackChar); + }); + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + }; + + TR::OnThreadPtr onThreadPtr = TR::GetOnThreadPtr(); + ASSERT_TRUE(onThreadPtr); + TestOnThreadRef(*onThreadPtr); + + TR::WithOnThreadRef( + [&](TR::OnThreadRef aOnThreadRef) { TestOnThreadRef(aOnThreadRef); }); + + EXPECT_TRUE(TR::WithOnThreadRefOr( + [&](TR::OnThreadRef aOnThreadRef) { + TestOnThreadRef(aOnThreadRef); + return true; + }, + false)); + }); + testThread.join(); +} + +// Thread name if registered, nullptr otherwise. +static const char* GetThreadName() { + return profiler::ThreadRegistration::WithOnThreadRefOr( + [](profiler::ThreadRegistration::OnThreadRef onThreadRef) { + return onThreadRef.WithUnlockedConstReader( + [](const profiler::ThreadRegistration::UnlockedConstReader& aData) { + return aData.Info().Name(); + }); + }, + nullptr); +} + +// Get the thread name, as registered in the PRThread, nullptr on failure. +static const char* GetPRThreadName() { + nsIThread* nsThread = NS_GetCurrentThread(); + if (!nsThread) { + return nullptr; + } + PRThread* prThread = nullptr; + if (NS_FAILED(nsThread->GetPRThread(&prThread))) { + return nullptr; + } + if (!prThread) { + return nullptr; + } + return PR_GetThreadName(prThread); +} + +TEST(GeckoProfiler, ThreadRegistration_MainThreadName) +{ + EXPECT_TRUE(profiler::ThreadRegistration::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "GeckoMain"); + + // Check that the real thread name (outside the profiler) is *not* GeckoMain. + EXPECT_STRNE(GetPRThreadName(), "GeckoMain"); +} + +TEST(GeckoProfiler, ThreadRegistration_NestedRegistrations) +{ + using TR = profiler::ThreadRegistration; + + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + // Note that the main thread could already be registered, so we work in a new + // thread to test actual registrations that we control. + + std::thread testThread([&]() { + ASSERT_FALSE(TR::IsRegistered()) + << "A new std::thread should not start registered"; + + char onStackChar; + + // Blocks {} are mostly for clarity, but some control on-stack registration + // lifetimes. + + // On-stack registration. + { + TR rt{"Test thread #1", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #1"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #1"); + } + ASSERT_FALSE(TR::IsRegistered()); + + // Off-stack registration. + { + TR::RegisterThread("Test thread #2", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #2"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #2"); + + TR::UnregisterThread(); + ASSERT_FALSE(TR::IsRegistered()); + } + + // Extra un-registration should be ignored. + TR::UnregisterThread(); + ASSERT_FALSE(TR::IsRegistered()); + + // Nested on-stack. + { + TR rt2{"Test thread #3", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #3"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #3"); + + { + TR rt3{"Test thread #4", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #3") + << "Nested registration shouldn't change the name"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #3") + << "Nested registration shouldn't change the PRThread name"; + } + ASSERT_TRUE(TR::IsRegistered()) + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetThreadName(), "Test thread #3") + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #3"); + } + ASSERT_FALSE(TR::IsRegistered()); + + // Nested off-stack. + { + TR::RegisterThread("Test thread #5", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #5"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #5"); + + { + TR::RegisterThread("Test thread #6", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #5") + << "Nested registration shouldn't change the name"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #5") + << "Nested registration shouldn't change the PRThread name"; + + TR::UnregisterThread(); + ASSERT_TRUE(TR::IsRegistered()) + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetThreadName(), "Test thread #5") + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #5"); + } + + TR::UnregisterThread(); + ASSERT_FALSE(TR::IsRegistered()); + } + + // Nested on- and off-stack. + { + TR rt2{"Test thread #7", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #7"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #7"); + + { + TR::RegisterThread("Test thread #8", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #7") + << "Nested registration shouldn't change the name"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #7") + << "Nested registration shouldn't change the PRThread name"; + + TR::UnregisterThread(); + ASSERT_TRUE(TR::IsRegistered()) + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetThreadName(), "Test thread #7") + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #7"); + } + } + ASSERT_FALSE(TR::IsRegistered()); + + // Nested off- and on-stack. + { + TR::RegisterThread("Test thread #9", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #9"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #9"); + + { + TR rt3{"Test thread #10", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #9") + << "Nested registration shouldn't change the name"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #9") + << "Nested registration shouldn't change the PRThread name"; + } + ASSERT_TRUE(TR::IsRegistered()) + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetThreadName(), "Test thread #9") + << "Thread should still be registered after nested un-registration"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #9"); + + TR::UnregisterThread(); + ASSERT_FALSE(TR::IsRegistered()); + } + + // Excess UnregisterThread with on-stack TR. + { + TR rt2{"Test thread #11", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #11"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #11"); + + TR::UnregisterThread(); + ASSERT_TRUE(TR::IsRegistered()) + << "On-stack thread should still be registered after off-stack " + "un-registration"; + EXPECT_STREQ(GetThreadName(), "Test thread #11") + << "On-stack thread should still be registered after off-stack " + "un-registration"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #11"); + } + ASSERT_FALSE(TR::IsRegistered()); + + // Excess on-thread TR destruction with already-unregistered root off-thread + // registration. + { + TR::RegisterThread("Test thread #12", &onStackChar); + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #12"); + EXPECT_STREQ(GetPRThreadName(), "Test thread #12"); + + { + TR rt3{"Test thread #13", &onStackChar}; + ASSERT_TRUE(TR::IsRegistered()); + EXPECT_STREQ(GetThreadName(), "Test thread #12") + << "Nested registration shouldn't change the name"; + EXPECT_STREQ(GetPRThreadName(), "Test thread #12") + << "Nested registration shouldn't change the PRThread name"; + + // Note that we unregister the root registration, while nested `rt3` is + // still alive. + TR::UnregisterThread(); + ASSERT_FALSE(TR::IsRegistered()) + << "UnregisterThread() of the root RegisterThread() should always work"; + + // At this end of this block, `rt3` will be destroyed, but nothing + // should happen. + } + ASSERT_FALSE(TR::IsRegistered()); + } + + ASSERT_FALSE(TR::IsRegistered()); + }); + testThread.join(); +} + +TEST(GeckoProfiler, ThreadRegistry_DataAccess) +{ + using TR = profiler::ThreadRegistration; + using TRy = profiler::ThreadRegistry; + + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + // Note that the main thread could already be registered, so we work in a new + // thread to test an actual registration that we control. + + std::thread testThread([&]() { + ASSERT_FALSE(TR::IsRegistered()) + << "A new std::thread should not start registered"; + EXPECT_FALSE(TR::GetOnThreadPtr()); + EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false)); + + char onStackChar; + + TimeStamp beforeRegistration = TimeStamp::Now(); + TR tr{"Test thread", &onStackChar}; + TimeStamp afterRegistration = TimeStamp::Now(); + + ASSERT_TRUE(TR::IsRegistered()); + + // Note: This test will mostly be about checking the correct access to + // thread data, depending on how it's obtained. Not all the functionality + // related to that data is tested (e.g., because it involves JS or other + // external dependencies that would be difficult to control here.) + + const ProfilerThreadId testThreadId = profiler_current_thread_id(); + + auto testThroughRegistry = [&]() { + auto TestOffThreadRef = [&](TRy::OffThreadRef aOffThreadRef) { + // To test const-qualified member functions. + const TRy::OffThreadRef& offThreadCRef = aOffThreadRef; + + // const UnlockedConstReader (always const) + + TestConstUnlockedConstReader(offThreadCRef.UnlockedConstReaderCRef(), + beforeRegistration, afterRegistration, + &onStackChar, testThreadId); + offThreadCRef.WithUnlockedConstReader( + [&](const TR::UnlockedConstReader& aData) { + TestConstUnlockedConstReader(aData, beforeRegistration, + afterRegistration, &onStackChar, + testThreadId); + }); + + // const UnlockedConstReaderAndAtomicRW + + TestConstUnlockedConstReaderAndAtomicRW( + offThreadCRef.UnlockedConstReaderAndAtomicRWCRef(), + beforeRegistration, afterRegistration, &onStackChar, testThreadId); + offThreadCRef.WithUnlockedConstReaderAndAtomicRW( + [&](const TR::UnlockedConstReaderAndAtomicRW& aData) { + TestConstUnlockedConstReaderAndAtomicRW( + aData, beforeRegistration, afterRegistration, &onStackChar, + testThreadId); + }); + + // non-const UnlockedConstReaderAndAtomicRW + + TestUnlockedConstReaderAndAtomicRW( + aOffThreadRef.UnlockedConstReaderAndAtomicRWRef(), + beforeRegistration, afterRegistration, &onStackChar, testThreadId); + aOffThreadRef.WithUnlockedConstReaderAndAtomicRW( + [&](TR::UnlockedConstReaderAndAtomicRW& aData) { + TestUnlockedConstReaderAndAtomicRW(aData, beforeRegistration, + afterRegistration, + &onStackChar, testThreadId); + }); + + // const UnlockedRWForLockedProfiler + + TestConstUnlockedRWForLockedProfiler( + offThreadCRef.UnlockedRWForLockedProfilerCRef(), beforeRegistration, + afterRegistration, &onStackChar, testThreadId); + offThreadCRef.WithUnlockedRWForLockedProfiler( + [&](const TR::UnlockedRWForLockedProfiler& aData) { + TestConstUnlockedRWForLockedProfiler(aData, beforeRegistration, + afterRegistration, + &onStackChar, testThreadId); + }); + + // non-const UnlockedRWForLockedProfiler + + TestUnlockedRWForLockedProfiler( + aOffThreadRef.UnlockedRWForLockedProfilerRef(), beforeRegistration, + afterRegistration, &onStackChar, testThreadId); + aOffThreadRef.WithUnlockedRWForLockedProfiler( + [&](TR::UnlockedRWForLockedProfiler& aData) { + TestUnlockedRWForLockedProfiler(aData, beforeRegistration, + afterRegistration, &onStackChar, + testThreadId); + }); + + // UnlockedReaderAndAtomicRWOnThread + // Note: It cannot directly be accessed off the thread, this will be + // tested through LockedRWFromAnyThread. + + // const LockedRWFromAnyThread + + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + { + TRy::OffThreadRef::ConstRWFromAnyThreadWithLock + constRWFromAnyThreadWithLock = + offThreadCRef.ConstLockedRWFromAnyThread(); + if (profiler_current_thread_id() == testThreadId) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + } + TestConstLockedRWFromAnyThread( + constRWFromAnyThreadWithLock.DataCRef(), beforeRegistration, + afterRegistration, &onStackChar, testThreadId); + } + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + offThreadCRef.WithConstLockedRWFromAnyThread( + [&](const TR::LockedRWFromAnyThread& aData) { + if (profiler_current_thread_id() == testThreadId) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + } + TestConstLockedRWFromAnyThread(aData, beforeRegistration, + afterRegistration, &onStackChar, + testThreadId); + }); + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + + // non-const LockedRWFromAnyThread + + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + { + TRy::OffThreadRef::RWFromAnyThreadWithLock rwFromAnyThreadWithLock = + aOffThreadRef.GetLockedRWFromAnyThread(); + if (profiler_current_thread_id() == testThreadId) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + } + TestLockedRWFromAnyThread(rwFromAnyThreadWithLock.DataRef(), + beforeRegistration, afterRegistration, + &onStackChar, testThreadId); + } + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + aOffThreadRef.WithLockedRWFromAnyThread( + [&](TR::LockedRWFromAnyThread& aData) { + if (profiler_current_thread_id() == testThreadId) { + EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread()); + } + TestLockedRWFromAnyThread(aData, beforeRegistration, + afterRegistration, &onStackChar, + testThreadId); + }); + EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread()); + + // LockedRWOnThread + // Note: It can never be accessed off the thread. + }; + + int ranTest = 0; + TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef aOffThreadRef) { + TestOffThreadRef(aOffThreadRef); + ++ranTest; + }); + EXPECT_EQ(ranTest, 1); + + EXPECT_TRUE(TRy::WithOffThreadRefOr( + testThreadId, + [&](TRy::OffThreadRef aOffThreadRef) { + TestOffThreadRef(aOffThreadRef); + return true; + }, + false)); + + ranTest = 0; + EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread()); + for (TRy::OffThreadRef offThreadRef : TRy::LockedRegistry{}) { + EXPECT_TRUE(TRy::IsRegistryMutexLockedOnCurrentThread() || + !TR::IsRegistered()); + if (offThreadRef.UnlockedConstReaderCRef().Info().ThreadId() == + testThreadId) { + TestOffThreadRef(offThreadRef); + ++ranTest; + } + } + EXPECT_EQ(ranTest, 1); + EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread()); + + { + ranTest = 0; + EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread()); + TRy::LockedRegistry lockedRegistry{}; + EXPECT_TRUE(TRy::IsRegistryMutexLockedOnCurrentThread() || + !TR::IsRegistered()); + for (TRy::OffThreadRef offThreadRef : lockedRegistry) { + if (offThreadRef.UnlockedConstReaderCRef().Info().ThreadId() == + testThreadId) { + TestOffThreadRef(offThreadRef); + ++ranTest; + } + } + EXPECT_EQ(ranTest, 1); + } + EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread()); + }; + + // Test on the current thread. + testThroughRegistry(); + + // Test from another thread. + std::thread otherThread([&]() { + ASSERT_NE(profiler_current_thread_id(), testThreadId); + testThroughRegistry(); + + // Test that this unregistered thread is really not registered. + int ranTest = 0; + TRy::WithOffThreadRef( + profiler_current_thread_id(), + [&](TRy::OffThreadRef aOffThreadRef) { ++ranTest; }); + EXPECT_EQ(ranTest, 0); + + EXPECT_FALSE(TRy::WithOffThreadRefOr( + profiler_current_thread_id(), + [&](TRy::OffThreadRef aOffThreadRef) { + ++ranTest; + return true; + }, + false)); + EXPECT_EQ(ranTest, 0); + }); + otherThread.join(); + }); + testThread.join(); +} + +TEST(GeckoProfiler, ThreadRegistration_RegistrationEdgeCases) +{ + using TR = profiler::ThreadRegistration; + using TRy = profiler::ThreadRegistry; + + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + // Note that the main thread could already be registered, so we work in a new + // thread to test an actual registration that we control. + + int registrationCount = 0; + int otherThreadLoops = 0; + int otherThreadReads = 0; + + // This thread will register and unregister in a loop, with some pauses. + // Another thread will attempty to access the test thread, and lock its data. + // The main goal is to check edges cases around (un)registrations. + std::thread testThread([&]() { + const ProfilerThreadId testThreadId = profiler_current_thread_id(); + + const TimeStamp endTestAt = TimeStamp::Now() + TimeDuration::FromSeconds(1); + + std::thread otherThread([&]() { + // Initial sleep so that testThread can start its loop. + PR_Sleep(PR_MillisecondsToInterval(1)); + + while (TimeStamp::Now() < endTestAt) { + ++otherThreadLoops; + + TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef + aOffThreadRef) { + if (otherThreadLoops % 1000 == 0) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + TRy::OffThreadRef::RWFromAnyThreadWithLock rwFromAnyThreadWithLock = + aOffThreadRef.GetLockedRWFromAnyThread(); + ++otherThreadReads; + if (otherThreadReads % 1000 == 0) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + }); + } + }); + + while (TimeStamp::Now() < endTestAt) { + ASSERT_FALSE(TR::IsRegistered()) + << "A new std::thread should not start registered"; + EXPECT_FALSE(TR::GetOnThreadPtr()); + EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false)); + + char onStackChar; + + TR tr{"Test thread", &onStackChar}; + ++registrationCount; + + ASSERT_TRUE(TR::IsRegistered()); + + int ranTest = 0; + TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef aOffThreadRef) { + if (registrationCount % 2000 == 0) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + ++ranTest; + }); + EXPECT_EQ(ranTest, 1); + + if (registrationCount % 1000 == 0) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + } + + otherThread.join(); + }); + + testThread.join(); + + // It's difficult to guess what these numbers should be, but they definitely + // should be non-zero. The main goal was to test that nothing goes wrong. + EXPECT_GT(registrationCount, 0); + EXPECT_GT(otherThreadLoops, 0); + EXPECT_GT(otherThreadReads, 0); +} + +#ifdef MOZ_GECKO_PROFILER + +TEST(BaseProfiler, BlocksRingBuffer) +{ + constexpr uint32_t MBSize = 256; + uint8_t buffer[MBSize * 3]; + for (size_t i = 0; i < MBSize * 3; ++i) { + buffer[i] = uint8_t('A' + i); + } + BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex, + &buffer[MBSize], MakePowerOfTwo32()); + + { + nsCString cs("nsCString"_ns); + nsString s(u"nsString"_ns); + nsAutoCString acs("nsAutoCString"_ns); + nsAutoString as(u"nsAutoString"_ns); + nsAutoCStringN<8> acs8("nsAutoCStringN"_ns); + nsAutoStringN<8> as8(u"nsAutoStringN"_ns); + JS::UniqueChars jsuc = JS_smprintf("%s", "JS::UniqueChars"); + + rb.PutObjects(cs, s, acs, as, acs8, as8, jsuc); + } + + rb.ReadEach([](ProfileBufferEntryReader& aER) { + ASSERT_EQ(aER.ReadObject(), "nsCString"_ns); + ASSERT_EQ(aER.ReadObject(), u"nsString"_ns); + ASSERT_EQ(aER.ReadObject(), "nsAutoCString"_ns); + ASSERT_EQ(aER.ReadObject(), u"nsAutoString"_ns); + ASSERT_EQ(aER.ReadObject>(), "nsAutoCStringN"_ns); + ASSERT_EQ(aER.ReadObject>(), u"nsAutoStringN"_ns); + auto jsuc2 = aER.ReadObject(); + ASSERT_TRUE(!!jsuc2); + ASSERT_TRUE(strcmp(jsuc2.get(), "JS::UniqueChars") == 0); + }); + + // Everything around the sub-buffer should be unchanged. + for (size_t i = 0; i < MBSize; ++i) { + ASSERT_EQ(buffer[i], uint8_t('A' + i)); + } + for (size_t i = MBSize * 2; i < MBSize * 3; ++i) { + ASSERT_EQ(buffer[i], uint8_t('A' + i)); + } +} + +// Common JSON checks. + +// Check that the given JSON string include no JSON whitespace characters +// (excluding those in property names and strings). +void JSONWhitespaceCheck(const char* aOutput) { + ASSERT_NE(aOutput, nullptr); + + enum class State { Data, String, StringEscaped }; + State state = State::Data; + size_t length = 0; + size_t whitespaces = 0; + for (const char* p = aOutput; *p != '\0'; ++p) { + ++length; + const char c = *p; + + switch (state) { + case State::Data: + if (c == '\n' || c == '\r' || c == ' ' || c == '\t') { + ++whitespaces; + } else if (c == '"') { + state = State::String; + } + break; + + case State::String: + if (c == '"') { + state = State::Data; + } else if (c == '\\') { + state = State::StringEscaped; + } + break; + + case State::StringEscaped: + state = State::String; + break; + } + } + + EXPECT_EQ(whitespaces, 0u); + EXPECT_GT(length, 0u); +} + +// Does the GETTER return a non-null TYPE? (Non-critical) +# define EXPECT_HAS_JSON(GETTER, TYPE) \ + do { \ + if ((GETTER).isNull()) { \ + EXPECT_FALSE((GETTER).isNull()) \ + << #GETTER " doesn't exist or is null"; \ + } else if (!(GETTER).is##TYPE()) { \ + EXPECT_TRUE((GETTER).is##TYPE()) \ + << #GETTER " didn't return type " #TYPE; \ + } \ + } while (false) + +// Does the GETTER return a non-null TYPE? (Critical) +# define ASSERT_HAS_JSON(GETTER, TYPE) \ + do { \ + ASSERT_FALSE((GETTER).isNull()); \ + ASSERT_TRUE((GETTER).is##TYPE()); \ + } while (false) + +// Does the GETTER return a non-null TYPE? (Critical) +// If yes, store the reference to Json::Value into VARIABLE. +# define GET_JSON(VARIABLE, GETTER, TYPE) \ + ASSERT_HAS_JSON(GETTER, TYPE); \ + const Json::Value& VARIABLE = (GETTER) + +// Does the GETTER return a non-null TYPE? (Critical) +// If yes, store the value as `const TYPE` into VARIABLE. +# define GET_JSON_VALUE(VARIABLE, GETTER, TYPE) \ + ASSERT_HAS_JSON(GETTER, TYPE); \ + const auto VARIABLE = (GETTER).as##TYPE() + +// Non-const GET_JSON_VALUE. +# define GET_JSON_MUTABLE_VALUE(VARIABLE, GETTER, TYPE) \ + ASSERT_HAS_JSON(GETTER, TYPE); \ + auto VARIABLE = (GETTER).as##TYPE() + +// Checks that the GETTER's value is present, is of the expected TYPE, and has +// the expected VALUE. (Non-critical) +# define EXPECT_EQ_JSON(GETTER, TYPE, VALUE) \ + do { \ + if ((GETTER).isNull()) { \ + EXPECT_FALSE((GETTER).isNull()) \ + << #GETTER " doesn't exist or is null"; \ + } else if (!(GETTER).is##TYPE()) { \ + EXPECT_TRUE((GETTER).is##TYPE()) \ + << #GETTER " didn't return type " #TYPE; \ + } else { \ + EXPECT_EQ((GETTER).as##TYPE(), (VALUE)); \ + } \ + } while (false) + +// Checks that the GETTER's value is present, and is a valid index into the +// STRINGTABLE array, pointing at the expected STRING. +# define EXPECT_EQ_STRINGTABLE(GETTER, STRINGTABLE, STRING) \ + do { \ + if ((GETTER).isNull()) { \ + EXPECT_FALSE((GETTER).isNull()) \ + << #GETTER " doesn't exist or is null"; \ + } else if (!(GETTER).isUInt()) { \ + EXPECT_TRUE((GETTER).isUInt()) << #GETTER " didn't return an index"; \ + } else { \ + EXPECT_LT((GETTER).asUInt(), (STRINGTABLE).size()); \ + EXPECT_EQ_JSON((STRINGTABLE)[(GETTER).asUInt()], String, (STRING)); \ + } \ + } while (false) + +# define EXPECT_JSON_ARRAY_CONTAINS(GETTER, TYPE, VALUE) \ + do { \ + if ((GETTER).isNull()) { \ + EXPECT_FALSE((GETTER).isNull()) \ + << #GETTER " doesn't exist or is null"; \ + } else if (!(GETTER).isArray()) { \ + EXPECT_TRUE((GETTER).is##TYPE()) << #GETTER " is not an array"; \ + } else if (const Json::ArrayIndex size = (GETTER).size(); size == 0u) { \ + EXPECT_NE(size, 0u) << #GETTER " is an empty array"; \ + } else { \ + bool found = false; \ + for (Json::ArrayIndex i = 0; i < size; ++i) { \ + if (!(GETTER)[i].is##TYPE()) { \ + EXPECT_TRUE((GETTER)[i].is##TYPE()) \ + << #GETTER "[" << i << "] is not " #TYPE; \ + break; \ + } \ + if ((GETTER)[i].as##TYPE() == (VALUE)) { \ + found = true; \ + break; \ + } \ + } \ + EXPECT_TRUE(found) << #GETTER " doesn't contain " #VALUE; \ + } \ + } while (false) + +# define EXPECT_JSON_ARRAY_EXCLUDES(GETTER, TYPE, VALUE) \ + do { \ + if ((GETTER).isNull()) { \ + EXPECT_FALSE((GETTER).isNull()) \ + << #GETTER " doesn't exist or is null"; \ + } else if (!(GETTER).isArray()) { \ + EXPECT_TRUE((GETTER).is##TYPE()) << #GETTER " is not an array"; \ + } else { \ + const Json::ArrayIndex size = (GETTER).size(); \ + for (Json::ArrayIndex i = 0; i < size; ++i) { \ + if (!(GETTER)[i].is##TYPE()) { \ + EXPECT_TRUE((GETTER)[i].is##TYPE()) \ + << #GETTER "[" << i << "] is not " #TYPE; \ + break; \ + } \ + if ((GETTER)[i].as##TYPE() == (VALUE)) { \ + EXPECT_TRUE((GETTER)[i].as##TYPE() != (VALUE)) \ + << #GETTER " contains " #VALUE; \ + break; \ + } \ + } \ + } \ + } while (false) + +// Check that the given process root contains all the expected properties. +static void JSONRootCheck(const Json::Value& aRoot, + bool aWithMainThread = true) { + ASSERT_TRUE(aRoot.isObject()); + + EXPECT_HAS_JSON(aRoot["libs"], Array); + + GET_JSON(meta, aRoot["meta"], Object); + EXPECT_HAS_JSON(meta["version"], UInt); + EXPECT_HAS_JSON(meta["startTime"], Double); + EXPECT_HAS_JSON(meta["profilingStartTime"], Double); + EXPECT_HAS_JSON(meta["contentEarliestTime"], Double); + EXPECT_HAS_JSON(meta["profilingEndTime"], Double); + + EXPECT_HAS_JSON(aRoot["pages"], Array); + + EXPECT_HAS_JSON(aRoot["profilerOverhead"], Object); + + // "counters" is only present if there is any data to report. + // Test that expect "counters" should test for its presence first. + if (aRoot.isMember("counters")) { + // We have "counters", test their overall validity. + GET_JSON(counters, aRoot["counters"], Array); + for (const Json::Value& counter : counters) { + ASSERT_TRUE(counter.isObject()); + EXPECT_HAS_JSON(counter["name"], String); + EXPECT_HAS_JSON(counter["category"], String); + EXPECT_HAS_JSON(counter["description"], String); + GET_JSON(sampleGroups, counter["sample_groups"], Array); + for (const Json::Value& sampleGroup : sampleGroups) { + ASSERT_TRUE(sampleGroup.isObject()); + EXPECT_HAS_JSON(sampleGroup["id"], UInt); + + GET_JSON(samples, sampleGroup["samples"], Object); + GET_JSON(samplesSchema, samples["schema"], Object); + EXPECT_GE(samplesSchema.size(), 3u); + GET_JSON_VALUE(samplesTime, samplesSchema["time"], UInt); + GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt); + GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt); + GET_JSON(samplesData, samples["data"], Array); + double previousTime = 0.0; + for (const Json::Value& sample : samplesData) { + ASSERT_TRUE(sample.isArray()); + GET_JSON_VALUE(time, sample[samplesTime], Double); + EXPECT_GE(time, previousTime); + previousTime = time; + if (sample.isValidIndex(samplesNumber)) { + EXPECT_HAS_JSON(sample[samplesNumber], UInt64); + } + if (sample.isValidIndex(samplesCount)) { + EXPECT_HAS_JSON(sample[samplesCount], Int64); + } + } + } + } + } + + GET_JSON(threads, aRoot["threads"], Array); + const Json::ArrayIndex threadCount = threads.size(); + for (Json::ArrayIndex i = 0; i < threadCount; ++i) { + GET_JSON(thread, threads[i], Object); + EXPECT_HAS_JSON(thread["processType"], String); + EXPECT_HAS_JSON(thread["name"], String); + EXPECT_HAS_JSON(thread["registerTime"], Double); + GET_JSON(samples, thread["samples"], Object); + EXPECT_HAS_JSON(thread["markers"], Object); + EXPECT_HAS_JSON(thread["pid"], Int64); + EXPECT_HAS_JSON(thread["tid"], Int64); + GET_JSON(stackTable, thread["stackTable"], Object); + GET_JSON(frameTable, thread["frameTable"], Object); + GET_JSON(stringTable, thread["stringTable"], Array); + + GET_JSON(stackTableSchema, stackTable["schema"], Object); + EXPECT_GE(stackTableSchema.size(), 2u); + GET_JSON_VALUE(stackTablePrefix, stackTableSchema["prefix"], UInt); + GET_JSON_VALUE(stackTableFrame, stackTableSchema["frame"], UInt); + GET_JSON(stackTableData, stackTable["data"], Array); + + GET_JSON(frameTableSchema, frameTable["schema"], Object); + EXPECT_GE(frameTableSchema.size(), 1u); + GET_JSON_VALUE(frameTableLocation, frameTableSchema["location"], UInt); + GET_JSON(frameTableData, frameTable["data"], Array); + + GET_JSON(samplesSchema, samples["schema"], Object); + GET_JSON_VALUE(sampleStackIndex, samplesSchema["stack"], UInt); + GET_JSON(samplesData, samples["data"], Array); + for (const Json::Value& sample : samplesData) { + ASSERT_TRUE(sample.isArray()); + if (sample.isValidIndex(sampleStackIndex)) { + if (!sample[sampleStackIndex].isNull()) { + GET_JSON_MUTABLE_VALUE(stack, sample[sampleStackIndex], UInt); + EXPECT_TRUE(stackTableData.isValidIndex(stack)); + for (;;) { + // `stack` (from the sample, or from the callee frame's "prefix" in + // the previous loop) points into the stackTable. + GET_JSON(stackTableEntry, stackTableData[stack], Array); + GET_JSON_VALUE(frame, stackTableEntry[stackTableFrame], UInt); + + // The stackTable entry's "frame" points into the frameTable. + EXPECT_TRUE(frameTableData.isValidIndex(frame)); + GET_JSON(frameTableEntry, frameTableData[frame], Array); + GET_JSON_VALUE(location, frameTableEntry[frameTableLocation], UInt); + + // The frameTable entry's "location" points at a string. + EXPECT_TRUE(stringTable.isValidIndex(location)); + + // The stackTable entry's "prefix" is null for the root frame. + if (stackTableEntry[stackTablePrefix].isNull()) { + break; + } + // Otherwise it recursively points at the caller in the stackTable. + GET_JSON_VALUE(prefix, stackTableEntry[stackTablePrefix], UInt); + EXPECT_TRUE(stackTableData.isValidIndex(prefix)); + stack = prefix; + } + } + } + } + } + + if (aWithMainThread) { + ASSERT_GT(threadCount, 0u); + GET_JSON(thread0, threads[0], Object); + EXPECT_EQ_JSON(thread0["name"], String, "GeckoMain"); + } + + EXPECT_HAS_JSON(aRoot["pausedRanges"], Array); + + const Json::Value& processes = aRoot["processes"]; + if (!processes.isNull()) { + ASSERT_TRUE(processes.isArray()); + const Json::ArrayIndex processCount = processes.size(); + for (Json::ArrayIndex i = 0; i < processCount; ++i) { + GET_JSON(process, processes[i], Object); + JSONRootCheck(process, aWithMainThread); + } + } + + GET_JSON(profilingLog, aRoot["profilingLog"], Object); + EXPECT_EQ(profilingLog.size(), 1u); + for (auto it = profilingLog.begin(); it != profilingLog.end(); ++it) { + // The key should be a pid. + const auto key = it.name(); + for (const auto letter : key) { + EXPECT_GE(letter, '0'); + EXPECT_LE(letter, '9'); + } + // And the value should be an object. + GET_JSON(logForPid, profilingLog[key], Object); + // Its content is not defined, but we expect at least these: + EXPECT_HAS_JSON(logForPid["profilingLogBegin_TSms"], Double); + EXPECT_HAS_JSON(logForPid["profilingLogEnd_TSms"], Double); + } +} + +// Check that various expected top properties are in the JSON, and then call the +// provided `aJSONCheckFunction` with the JSON root object. +template +void JSONOutputCheck(const char* aOutput, + JSONCheckFunction&& aJSONCheckFunction) { + ASSERT_NE(aOutput, nullptr); + + JSONWhitespaceCheck(aOutput); + + // Extract JSON. + Json::Value parsedRoot; + Json::CharReaderBuilder builder; + const std::unique_ptr reader(builder.newCharReader()); + ASSERT_TRUE( + reader->parse(aOutput, strchr(aOutput, '\0'), &parsedRoot, nullptr)); + + JSONRootCheck(parsedRoot); + + std::forward(aJSONCheckFunction)(parsedRoot); +} + +// Returns `static_cast(-1)` if callback could not be installed. +static SamplingState WaitForSamplingState() { + Atomic samplingState{-1}; + + if (!profiler_callback_after_sampling([&](SamplingState aSamplingState) { + samplingState = static_cast(aSamplingState); + })) { + return static_cast(-1); + } + + while (samplingState == -1) { + } + + return static_cast(static_cast(samplingState)); +} + +typedef Vector StrVec; + +static void InactiveFeaturesAndParamsCheck() { + int entries; + Maybe duration; + double interval; + uint32_t features; + StrVec filters; + uint64_t activeTabID; + + ASSERT_TRUE(!profiler_is_active()); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::NativeAllocations)); + + profiler_get_start_params(&entries, &duration, &interval, &features, &filters, + &activeTabID); + + ASSERT_TRUE(entries == 0); + ASSERT_TRUE(duration == Nothing()); + ASSERT_TRUE(interval == 0); + ASSERT_TRUE(features == 0); + ASSERT_TRUE(filters.empty()); + ASSERT_TRUE(activeTabID == 0); +} + +static void ActiveParamsCheck(int aEntries, double aInterval, + uint32_t aFeatures, const char** aFilters, + size_t aFiltersLen, uint64_t aActiveTabID, + const Maybe& aDuration = Nothing()) { + int entries; + Maybe duration; + double interval; + uint32_t features; + StrVec filters; + uint64_t activeTabID; + + profiler_get_start_params(&entries, &duration, &interval, &features, &filters, + &activeTabID); + + ASSERT_TRUE(entries == aEntries); + ASSERT_TRUE(duration == aDuration); + ASSERT_TRUE(interval == aInterval); + ASSERT_TRUE(features == aFeatures); + ASSERT_TRUE(filters.length() == aFiltersLen); + ASSERT_TRUE(activeTabID == aActiveTabID); + for (size_t i = 0; i < aFiltersLen; i++) { + ASSERT_TRUE(strcmp(filters[i], aFilters[i]) == 0); + } +} + +TEST(GeckoProfiler, FeaturesAndParams) +{ + InactiveFeaturesAndParamsCheck(); + + // Try a couple of features and filters. + { + uint32_t features = ProfilerFeature::JS; + const char* filters[] = {"GeckoMain", "Compositor"}; + +# define PROFILER_DEFAULT_DURATION 20 /* seconds, for tests only */ + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 100, + Some(PROFILER_DEFAULT_DURATION)); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(), + PROFILER_DEFAULT_INTERVAL, features, filters, + MOZ_ARRAY_LENGTH(filters), 100, + Some(PROFILER_DEFAULT_DURATION)); + + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + } + + // Try some different features and filters. + { + uint32_t features = + ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages; + const char* filters[] = {"GeckoMain", "Foo", "Bar"}; + + // Testing with some arbitrary buffer size (as could be provided by + // external code), which we convert to the appropriate power of 2. + profiler_start(PowerOfTwo32(999999), 3, features, filters, + MOZ_ARRAY_LENGTH(filters), 123, Some(25.0)); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(int(PowerOfTwo32(999999).Value()), 3, features, filters, + MOZ_ARRAY_LENGTH(filters), 123, Some(25.0)); + + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + } + + // Try with no duration + { + uint32_t features = + ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages; + const char* filters[] = {"GeckoMain", "Foo", "Bar"}; + + profiler_start(PowerOfTwo32(999999), 3, features, filters, + MOZ_ARRAY_LENGTH(filters), 0, Nothing()); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(int(PowerOfTwo32(999999).Value()), 3, features, filters, + MOZ_ARRAY_LENGTH(filters), 0, Nothing()); + + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + } + + // Try all supported features, and filters that match all threads. + { + uint32_t availableFeatures = profiler_get_available_features(); + const char* filters[] = {""}; + + profiler_start(PowerOfTwo32(88888), 10, availableFeatures, filters, + MOZ_ARRAY_LENGTH(filters), 0, Some(15.0)); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(PowerOfTwo32(88888).Value(), 10, availableFeatures, + filters, MOZ_ARRAY_LENGTH(filters), 0, Some(15.0)); + + // Don't call profiler_stop() here. + } + + // Try no features, and filters that match no threads. + { + uint32_t features = 0; + const char* filters[] = {"NoThreadWillMatchThis"}; + + // Second profiler_start() call in a row without an intervening + // profiler_stop(); this will do an implicit profiler_stop() and restart. + profiler_start(PowerOfTwo32(0), 0, features, filters, + MOZ_ARRAY_LENGTH(filters), 0, Some(0.0)); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages)); + + // Entries and intervals go to defaults if 0 is specified. + ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(), + PROFILER_DEFAULT_INTERVAL, features, filters, + MOZ_ARRAY_LENGTH(filters), 0, Nothing()); + + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + + // These calls are no-ops. + profiler_stop(); + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + } +} + +TEST(GeckoProfiler, EnsureStarted) +{ + InactiveFeaturesAndParamsCheck(); + + uint32_t features = ProfilerFeature::JS; + const char* filters[] = {"GeckoMain", "Compositor"}; + { + // Inactive -> Active + profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0, + Some(PROFILER_DEFAULT_DURATION)); + + ActiveParamsCheck( + PROFILER_DEFAULT_ENTRIES.Value(), PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0, Some(PROFILER_DEFAULT_DURATION)); + } + + { + // Active -> Active with same settings + + Maybe info0 = profiler_get_buffer_info(); + ASSERT_TRUE(info0->mRangeEnd > 0); + + // First, write some samples into the buffer. + PR_Sleep(PR_MillisecondsToInterval(500)); + + Maybe info1 = profiler_get_buffer_info(); + ASSERT_TRUE(info1->mRangeEnd > info0->mRangeEnd); + + // Call profiler_ensure_started with the same settings as before. + // This operation must not clear our buffer! + profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0, + Some(PROFILER_DEFAULT_DURATION)); + + ActiveParamsCheck( + PROFILER_DEFAULT_ENTRIES.Value(), PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0, Some(PROFILER_DEFAULT_DURATION)); + + // Check that our position in the buffer stayed the same or advanced, but + // not by much, and the range-start after profiler_ensure_started shouldn't + // have passed the range-end before. + Maybe info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info2->mRangeEnd >= info1->mRangeEnd); + ASSERT_TRUE(info2->mRangeEnd - info1->mRangeEnd < + info1->mRangeEnd - info0->mRangeEnd); + ASSERT_TRUE(info2->mRangeStart < info1->mRangeEnd); + } + + { + // Active -> Active with *different* settings + + Maybe info1 = profiler_get_buffer_info(); + + // Call profiler_ensure_started with a different feature set than the one + // it's currently running with. This is supposed to stop and restart the + // profiler, thereby discarding the buffer contents. + uint32_t differentFeatures = features | ProfilerFeature::CPUUtilization; + profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + differentFeatures, filters, + MOZ_ARRAY_LENGTH(filters), 0); + + ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(), + PROFILER_DEFAULT_INTERVAL, differentFeatures, filters, + MOZ_ARRAY_LENGTH(filters), 0); + + // Check the the buffer was cleared, so its range-start should be at/after + // its range-end before. + Maybe info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info2->mRangeStart >= info1->mRangeEnd); + } + + { + // Active -> Inactive + + profiler_stop(); + + InactiveFeaturesAndParamsCheck(); + } +} + +TEST(GeckoProfiler, MultiRegistration) +{ + // This whole test only checks that function calls don't crash, they don't + // actually verify that threads get profiled or not. + + { + std::thread thread([]() { + char top; + profiler_register_thread("thread, no unreg", &top); + }); + thread.join(); + } + + { + std::thread thread([]() { profiler_unregister_thread(); }); + thread.join(); + } + + { + std::thread thread([]() { + char top; + profiler_register_thread("thread 1st", &top); + profiler_unregister_thread(); + profiler_register_thread("thread 2nd", &top); + profiler_unregister_thread(); + }); + thread.join(); + } + + { + std::thread thread([]() { + char top; + profiler_register_thread("thread once", &top); + profiler_register_thread("thread again", &top); + profiler_unregister_thread(); + }); + thread.join(); + } + + { + std::thread thread([]() { + char top; + profiler_register_thread("thread to unreg twice", &top); + profiler_unregister_thread(); + profiler_unregister_thread(); + }); + thread.join(); + } +} + +TEST(GeckoProfiler, DifferentThreads) +{ + InactiveFeaturesAndParamsCheck(); + + nsCOMPtr thread; + nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread)); + ASSERT_NS_SUCCEEDED(rv); + + // Control the profiler on a background thread and verify flags on the + // main thread. + { + uint32_t features = ProfilerFeature::JS; + const char* filters[] = {"GeckoMain", "Compositor"}; + + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread, + NS_NewRunnableFunction( + "GeckoProfiler_DifferentThreads_Test::TestBody", [&]() { + profiler_start(PROFILER_DEFAULT_ENTRIES, + PROFILER_DEFAULT_INTERVAL, features, filters, + MOZ_ARRAY_LENGTH(filters), 0); + })); + + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(), + PROFILER_DEFAULT_INTERVAL, features, filters, + MOZ_ARRAY_LENGTH(filters), 0); + + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread, + NS_NewRunnableFunction("GeckoProfiler_DifferentThreads_Test::TestBody", + [&]() { profiler_stop(); })); + + InactiveFeaturesAndParamsCheck(); + } + + // Control the profiler on the main thread and verify flags on a + // background thread. + { + uint32_t features = ProfilerFeature::JS; + const char* filters[] = {"GeckoMain", "Compositor"}; + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread, + NS_NewRunnableFunction( + "GeckoProfiler_DifferentThreads_Test::TestBody", [&]() { + ASSERT_TRUE(profiler_is_active()); + ASSERT_TRUE( + !profiler_feature_active(ProfilerFeature::MainThreadIO)); + ASSERT_TRUE( + !profiler_feature_active(ProfilerFeature::IPCMessages)); + + ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(), + PROFILER_DEFAULT_INTERVAL, features, filters, + MOZ_ARRAY_LENGTH(filters), 0); + })); + + profiler_stop(); + + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread, + NS_NewRunnableFunction("GeckoProfiler_DifferentThreads_Test::TestBody", + [&]() { InactiveFeaturesAndParamsCheck(); })); + } + + thread->Shutdown(); +} + +TEST(GeckoProfiler, GetBacktrace) +{ + ASSERT_TRUE(!profiler_get_backtrace()); + + { + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + // These will be destroyed while the profiler is active. + static const int N = 100; + { + UniqueProfilerBacktrace u[N]; + for (int i = 0; i < N; i++) { + u[i] = profiler_get_backtrace(); + ASSERT_TRUE(u[i]); + } + } + + // These will be destroyed after the profiler stops. + UniqueProfilerBacktrace u[N]; + for (int i = 0; i < N; i++) { + u[i] = profiler_get_backtrace(); + ASSERT_TRUE(u[i]); + } + + profiler_stop(); + } + + ASSERT_TRUE(!profiler_get_backtrace()); +} + +TEST(GeckoProfiler, Pause) +{ + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test must run on the main thread"; + + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain", "Profiled GeckoProfiler.Pause"}; + + ASSERT_TRUE(!profiler_is_paused()); + for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(), + features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + + std::thread{[&]() { + { + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Ignored GeckoProfiler.Pause - before start"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Profiled GeckoProfiler.Pause - before start"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + }}.join(); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_TRUE(!profiler_is_paused()); + for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_current_thread_id(), + features)); + } + + std::thread{[&]() { + { + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Ignored GeckoProfiler.Pause - after start"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Profiled GeckoProfiler.Pause - after start"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + }}.join(); + + // Check that we are writing samples while not paused. + Maybe info1 = profiler_get_buffer_info(); + PR_Sleep(PR_MillisecondsToInterval(500)); + Maybe info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info1->mRangeEnd != info2->mRangeEnd); + + // Check that we are writing markers while not paused. + ASSERT_TRUE(profiler_thread_is_being_profiled_for_markers()); + ASSERT_TRUE( + profiler_thread_is_being_profiled_for_markers(ProfilerThreadId{})); + ASSERT_TRUE(profiler_thread_is_being_profiled_for_markers( + profiler_current_thread_id())); + ASSERT_TRUE( + profiler_thread_is_being_profiled_for_markers(profiler_main_thread_id())); + info1 = profiler_get_buffer_info(); + PROFILER_MARKER_UNTYPED("Not paused", OTHER, {}); + info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info1->mRangeEnd != info2->mRangeEnd); + + profiler_pause(); + + ASSERT_TRUE(profiler_is_paused()); + for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(), + features)); + } + ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers()); + ASSERT_TRUE( + !profiler_thread_is_being_profiled_for_markers(ProfilerThreadId{})); + ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers( + profiler_current_thread_id())); + ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers( + profiler_main_thread_id())); + + std::thread{[&]() { + { + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Ignored GeckoProfiler.Pause - after pause"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Profiled GeckoProfiler.Pause - after pause"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + }}.join(); + + // Check that we are not writing samples while paused. + info1 = profiler_get_buffer_info(); + PR_Sleep(PR_MillisecondsToInterval(500)); + info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info1->mRangeEnd == info2->mRangeEnd); + + // Check that we are now writing markers while paused. + info1 = profiler_get_buffer_info(); + PROFILER_MARKER_UNTYPED("Paused", OTHER, {}); + info2 = profiler_get_buffer_info(); + ASSERT_TRUE(info1->mRangeEnd == info2->mRangeEnd); + PROFILER_MARKER_UNTYPED("Paused v2", OTHER, {}); + Maybe info3 = profiler_get_buffer_info(); + ASSERT_TRUE(info2->mRangeEnd == info3->mRangeEnd); + + profiler_resume(); + + ASSERT_TRUE(!profiler_is_paused()); + for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_current_thread_id(), + features)); + } + + std::thread{[&]() { + { + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Ignored GeckoProfiler.Pause - after resume"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Profiled GeckoProfiler.Pause - after resume"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(), + features)); + } + } + }}.join(); + + profiler_stop(); + + ASSERT_TRUE(!profiler_is_paused()); + for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(), + features)); + } + + std::thread{[&]() { + { + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD("Ignored GeckoProfiler.Pause - after stop"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + { + AUTO_PROFILER_REGISTER_THREAD( + "Profiled GeckoProfiler.Pause - after stop"); + for (ThreadProfilingFeatures features : + scEachAndAnyThreadProfilingFeatures) { + ASSERT_TRUE(!profiler_thread_is_being_profiled(features)); + ASSERT_TRUE( + !profiler_thread_is_being_profiled(ProfilerThreadId{}, features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_current_thread_id(), features)); + ASSERT_TRUE(!profiler_thread_is_being_profiled( + profiler_main_thread_id(), features)); + } + } + }}.join(); +} + +TEST(GeckoProfiler, Markers) +{ + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + PROFILER_MARKER("tracing event", OTHER, {}, Tracing, "A"); + PROFILER_MARKER("tracing start", OTHER, MarkerTiming::IntervalStart(), + Tracing, "A"); + PROFILER_MARKER("tracing end", OTHER, MarkerTiming::IntervalEnd(), Tracing, + "A"); + + auto bt = profiler_capture_backtrace(); + PROFILER_MARKER("tracing event with stack", OTHER, + MarkerStack::TakeBacktrace(std::move(bt)), Tracing, "B"); + + { AUTO_PROFILER_TRACING_MARKER("C", "auto tracing", OTHER); } + + PROFILER_MARKER_UNTYPED("M1", OTHER, {}); + PROFILER_MARKER_UNTYPED("M3", OTHER, {}); + + // Create three strings: two that are the maximum allowed length, and one that + // is one char longer. + static const size_t kMax = ProfileBuffer::kMaxFrameKeyLength; + UniquePtr okstr1 = MakeUnique(kMax); + UniquePtr okstr2 = MakeUnique(kMax); + UniquePtr longstr = MakeUnique(kMax + 1); + UniquePtr longstrCut = MakeUnique(kMax + 1); + for (size_t i = 0; i < kMax; i++) { + okstr1[i] = 'a'; + okstr2[i] = 'b'; + longstr[i] = 'c'; + longstrCut[i] = 'c'; + } + okstr1[kMax - 1] = '\0'; + okstr2[kMax - 1] = '\0'; + longstr[kMax] = '\0'; + longstrCut[kMax] = '\0'; + // Should be output as-is. + AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, ""); + AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, okstr1.get()); + // Should be output as label + space + okstr2. + AUTO_PROFILER_LABEL_DYNAMIC_CSTR("okstr2", LAYOUT, okstr2.get()); + // Should be output with kMax length, ending with "...\0". + AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, longstr.get()); + ASSERT_EQ(longstrCut[kMax - 4], 'c'); + longstrCut[kMax - 4] = '.'; + ASSERT_EQ(longstrCut[kMax - 3], 'c'); + longstrCut[kMax - 3] = '.'; + ASSERT_EQ(longstrCut[kMax - 2], 'c'); + longstrCut[kMax - 2] = '.'; + ASSERT_EQ(longstrCut[kMax - 1], 'c'); + longstrCut[kMax - 1] = '\0'; + + // Test basic markers 2.0. + EXPECT_TRUE( + profiler_add_marker("default-templated markers 2.0 with empty options", + geckoprofiler::category::OTHER, {})); + + PROFILER_MARKER_UNTYPED( + "default-templated markers 2.0 with option", OTHER, + MarkerStack::TakeBacktrace(profiler_capture_backtrace())); + + PROFILER_MARKER("explicitly-default-templated markers 2.0 with empty options", + OTHER, {}, NoPayload); + + EXPECT_TRUE(profiler_add_marker( + "explicitly-default-templated markers 2.0 with option", + geckoprofiler::category::OTHER, {}, + ::geckoprofiler::markers::NoPayload{})); + + // Used in markers below. + TimeStamp ts1 = TimeStamp::Now(); + + // Sleep briefly to ensure a sample is taken and the pending markers are + // processed. + PR_Sleep(PR_MillisecondsToInterval(500)); + + // Used in markers below. + TimeStamp ts2 = TimeStamp::Now(); + // ts1 and ts2 should be different thanks to the sleep. + EXPECT_NE(ts1, ts2); + + // Test most marker payloads. + + // Keep this one first! (It's used to record `ts1` and `ts2`, to compare + // to serialized numbers in other markers.) + EXPECT_TRUE(profiler_add_marker("FirstMarker", geckoprofiler::category::OTHER, + MarkerTiming::Interval(ts1, ts2), + geckoprofiler::markers::TextMarker{}, + "First Marker")); + + // User-defined marker type with different properties, and fake schema. + struct GtestMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("markers-gtest"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int aInt, + double aDouble, const mozilla::ProfilerString8View& aText, + const mozilla::ProfilerString8View& aUniqueText, + const mozilla::TimeStamp& aTime) { + aWriter.NullProperty("null"); + aWriter.BoolProperty("bool-false", false); + aWriter.BoolProperty("bool-true", true); + aWriter.IntProperty("int", aInt); + aWriter.DoubleProperty("double", aDouble); + aWriter.StringProperty("text", aText); + aWriter.UniqueStringProperty("unique text", aUniqueText); + aWriter.UniqueStringProperty("unique text again", aUniqueText); + aWriter.TimeProperty("time", aTime); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + // Note: This is an test function that is not intended to actually output + // that correctly matches StreamJSONMarkerData data above! Instead we only + // test that it outputs the expected JSON at the end. + using MS = mozilla::MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable, + MS::Location::TimelineOverview, MS::Location::TimelineMemory, + MS::Location::TimelineIPC, MS::Location::TimelineFileIO, + MS::Location::StackChart}; + // All label functions. + schema.SetChartLabel("chart label"); + schema.SetTooltipLabel("tooltip label"); + schema.SetTableLabel("table label"); + // All data functions, all formats, all "searchable" values. + schema.AddKeyFormat("key with url", MS::Format::Url); + schema.AddKeyLabelFormat("key with label filePath", "label filePath", + MS::Format::FilePath); + schema.AddKeyFormatSearchable("key with string not-searchable", + MS::Format::String, + MS::Searchable::NotSearchable); + schema.AddKeyLabelFormatSearchable("key with label duration searchable", + "label duration", MS::Format::Duration, + MS::Searchable::Searchable); + schema.AddKeyFormat("key with time", MS::Format::Time); + schema.AddKeyFormat("key with seconds", MS::Format::Seconds); + schema.AddKeyFormat("key with milliseconds", MS::Format::Milliseconds); + schema.AddKeyFormat("key with microseconds", MS::Format::Microseconds); + schema.AddKeyFormat("key with nanoseconds", MS::Format::Nanoseconds); + schema.AddKeyFormat("key with bytes", MS::Format::Bytes); + schema.AddKeyFormat("key with percentage", MS::Format::Percentage); + schema.AddKeyFormat("key with integer", MS::Format::Integer); + schema.AddKeyFormat("key with decimal", MS::Format::Decimal); + schema.AddStaticLabelValue("static label", "static value"); + return schema; + } + }; + EXPECT_TRUE( + profiler_add_marker("Gtest custom marker", geckoprofiler::category::OTHER, + MarkerTiming::Interval(ts1, ts2), GtestMarker{}, 42, + 43.0, "gtest text", "gtest unique text", ts1)); + + // User-defined marker type with no data, special frontend schema. + struct GtestSpecialMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("markers-gtest-special"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {} + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + EXPECT_TRUE(profiler_add_marker("Gtest special marker", + geckoprofiler::category::OTHER, {}, + GtestSpecialMarker{})); + + // User-defined marker type that is never used, so it shouldn't appear in the + // output. + struct GtestUnusedMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("markers-gtest-unused"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {} + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + + // Make sure the compiler doesn't complain about this unused struct. + mozilla::Unused << GtestUnusedMarker{}; + + // Other markers in alphabetical order of payload class names. + + nsCOMPtr uri; + ASSERT_TRUE( + NS_SUCCEEDED(NS_NewURI(getter_AddRefs(uri), "http://mozilla.org/"_ns))); + // The marker name will be "Load : ". + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 1, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_START, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheHit, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false + /* const mozilla::net::TimingStruct* aTimings = nullptr */ + /* mozilla::UniquePtr aSource = + nullptr */ + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + /* nsIURI* aRedirectURI = nullptr */ + /* uint64_t aRedirectChannelId = 0 */ + ); + + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 2, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_STOP, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false, + /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr, + /* mozilla::UniquePtr aSource = + nullptr */ + nullptr, + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + Some(nsDependentCString("text/html")), + /* nsIURI* aRedirectURI = nullptr */ nullptr, + /* uint64_t aRedirectChannelId = 0 */ 0); + + nsCOMPtr redirectURI; + ASSERT_TRUE(NS_SUCCEEDED( + NS_NewURI(getter_AddRefs(redirectURI), "http://example.com/"_ns))); + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 3, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false, + /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr, + /* mozilla::UniquePtr aSource = + nullptr */ + nullptr, + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + mozilla::Nothing(), + /* nsIURI* aRedirectURI = nullptr */ redirectURI, + /* uint32_t aRedirectFlags = 0 */ + nsIChannelEventSink::REDIRECT_TEMPORARY, + /* uint64_t aRedirectChannelId = 0 */ 103); + + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 4, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false, + /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr, + /* mozilla::UniquePtr aSource = + nullptr */ + nullptr, + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + mozilla::Nothing(), + /* nsIURI* aRedirectURI = nullptr */ redirectURI, + /* uint32_t aRedirectFlags = 0 */ + nsIChannelEventSink::REDIRECT_PERMANENT, + /* uint64_t aRedirectChannelId = 0 */ 104); + + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 5, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false, + /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr, + /* mozilla::UniquePtr aSource = + nullptr */ + nullptr, + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + mozilla::Nothing(), + /* nsIURI* aRedirectURI = nullptr */ redirectURI, + /* uint32_t aRedirectFlags = 0 */ nsIChannelEventSink::REDIRECT_INTERNAL, + /* uint64_t aRedirectChannelId = 0 */ 105); + + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 6, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ false, + /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr, + /* mozilla::UniquePtr aSource = + nullptr */ + nullptr, + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + mozilla::Nothing(), + /* nsIURI* aRedirectURI = nullptr */ redirectURI, + /* uint32_t aRedirectFlags = 0 */ nsIChannelEventSink::REDIRECT_INTERNAL | + nsIChannelEventSink::REDIRECT_STS_UPGRADE, + /* uint64_t aRedirectChannelId = 0 */ 106); + profiler_add_network_marker( + /* nsIURI* aURI */ uri, + /* const nsACString& aRequestMethod */ "GET"_ns, + /* int32_t aPriority */ 34, + /* uint64_t aChannelId */ 7, + /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_START, + /* mozilla::TimeStamp aStart */ ts1, + /* mozilla::TimeStamp aEnd */ ts2, + /* int64_t aCount */ 56, + /* mozilla::net::CacheDisposition aCacheDisposition */ + net::kCacheUnresolved, + /* uint64_t aInnerWindowID */ 78, + /* bool aIsPrivateBrowsing */ true + /* const mozilla::net::TimingStruct* aTimings = nullptr */ + /* mozilla::UniquePtr aSource = + nullptr */ + /* const mozilla::Maybe& aContentType = + mozilla::Nothing() */ + /* nsIURI* aRedirectURI = nullptr */ + /* uint64_t aRedirectChannelId = 0 */ + ); + + EXPECT_TRUE(profiler_add_marker( + "Text in main thread with stack", geckoprofiler::category::OTHER, + {MarkerStack::Capture(), MarkerTiming::Interval(ts1, ts2)}, + geckoprofiler::markers::TextMarker{}, "")); + EXPECT_TRUE(profiler_add_marker( + "Text from main thread with stack", geckoprofiler::category::OTHER, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + geckoprofiler::markers::TextMarker{}, "")); + + std::thread registeredThread([]() { + AUTO_PROFILER_REGISTER_THREAD("Marker test sub-thread"); + // Marker in non-profiled thread won't be stored. + EXPECT_FALSE(profiler_add_marker( + "Text in registered thread with stack", geckoprofiler::category::OTHER, + MarkerStack::Capture(), geckoprofiler::markers::TextMarker{}, "")); + // Marker will be stored in main thread, with stack from registered thread. + EXPECT_TRUE(profiler_add_marker( + "Text from registered thread with stack", + geckoprofiler::category::OTHER, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + geckoprofiler::markers::TextMarker{}, "")); + }); + registeredThread.join(); + + std::thread unregisteredThread([]() { + // Marker in unregistered thread won't be stored. + EXPECT_FALSE(profiler_add_marker("Text in unregistered thread with stack", + geckoprofiler::category::OTHER, + MarkerStack::Capture(), + geckoprofiler::markers::TextMarker{}, "")); + // Marker will be stored in main thread, but stack cannot be captured in an + // unregistered thread. + EXPECT_TRUE(profiler_add_marker( + "Text from unregistered thread with stack", + geckoprofiler::category::OTHER, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + geckoprofiler::markers::TextMarker{}, "")); + }); + unregisteredThread.join(); + + EXPECT_TRUE(profiler_add_marker("Tracing", geckoprofiler::category::OTHER, {}, + geckoprofiler::markers::Tracing{}, + "category")); + + EXPECT_TRUE(profiler_add_marker("Text", geckoprofiler::category::OTHER, {}, + geckoprofiler::markers::TextMarker{}, + "Text text")); + + // Ensure that we evaluate to false for markers with very long texts by + // testing against a ~3mb string. A string of this size should exceed the + // available buffer chunks (max: 2) that are available and be discarded. + EXPECT_FALSE(profiler_add_marker("Text", geckoprofiler::category::OTHER, {}, + geckoprofiler::markers::TextMarker{}, + std::string(3 * 1024 * 1024, 'x'))); + + EXPECT_TRUE(profiler_add_marker( + "MediaSample", geckoprofiler::category::OTHER, {}, + geckoprofiler::markers::MediaSampleMarker{}, 123, 456, 789)); + + SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()}; + w.Start(); + EXPECT_TRUE(::profiler_stream_json_for_this_process(w).isOk()); + w.End(); + + EXPECT_FALSE(w.Failed()); + + UniquePtr profile = w.ChunkedWriteFunc().CopyData(); + ASSERT_TRUE(!!profile.get()); + + // Expected markers, in order. + enum State { + S_tracing_event, + S_tracing_start, + S_tracing_end, + S_tracing_event_with_stack, + S_tracing_auto_tracing_start, + S_tracing_auto_tracing_end, + S_M1, + S_M3, + S_Markers2DefaultEmptyOptions, + S_Markers2DefaultWithOptions, + S_Markers2ExplicitDefaultEmptyOptions, + S_Markers2ExplicitDefaultWithOptions, + S_FirstMarker, + S_CustomMarker, + S_SpecialMarker, + S_NetworkMarkerPayload_start, + S_NetworkMarkerPayload_stop, + S_NetworkMarkerPayload_redirect_temporary, + S_NetworkMarkerPayload_redirect_permanent, + S_NetworkMarkerPayload_redirect_internal, + S_NetworkMarkerPayload_redirect_internal_sts, + S_NetworkMarkerPayload_private_browsing, + + S_TextWithStack, + S_TextToMTWithStack, + S_RegThread_TextToMTWithStack, + S_UnregThread_TextToMTWithStack, + + S_LAST, + } state = State(0); + + // These will be set when first read from S_FirstMarker, then + // compared in following markers. + // TODO: Compute these values from the timestamps. + double ts1Double = 0.0; + double ts2Double = 0.0; + + JSONOutputCheck(profile.get(), [&](const Json::Value& root) { + { + GET_JSON(threads, root["threads"], Array); + ASSERT_EQ(threads.size(), 1u); + + { + GET_JSON(thread0, threads[0], Object); + + // Keep a reference to the string table in this block, it will be used + // below. + GET_JSON(stringTable, thread0["stringTable"], Array); + ASSERT_TRUE(stringTable.isArray()); + + // Test the expected labels in the string table. + bool foundEmpty = false; + bool foundOkstr1 = false; + bool foundOkstr2 = false; + const std::string okstr2Label = std::string("okstr2 ") + okstr2.get(); + bool foundTooLong = false; + for (const auto& s : stringTable) { + ASSERT_TRUE(s.isString()); + std::string sString = s.asString(); + if (sString.empty()) { + EXPECT_FALSE(foundEmpty); + foundEmpty = true; + } else if (sString == okstr1.get()) { + EXPECT_FALSE(foundOkstr1); + foundOkstr1 = true; + } else if (sString == okstr2Label) { + EXPECT_FALSE(foundOkstr2); + foundOkstr2 = true; + } else if (sString == longstrCut.get()) { + EXPECT_FALSE(foundTooLong); + foundTooLong = true; + } else { + EXPECT_NE(sString, longstr.get()); + } + } + EXPECT_TRUE(foundEmpty); + EXPECT_TRUE(foundOkstr1); + EXPECT_TRUE(foundOkstr2); + EXPECT_TRUE(foundTooLong); + + { + GET_JSON(markers, thread0["markers"], Object); + + { + GET_JSON(data, markers["data"], Array); + + for (const Json::Value& marker : data) { + // Name the indexes into the marker tuple: + // [name, startTime, endTime, phase, category, payload] + const unsigned int NAME = 0u; + const unsigned int START_TIME = 1u; + const unsigned int END_TIME = 2u; + const unsigned int PHASE = 3u; + const unsigned int CATEGORY = 4u; + const unsigned int PAYLOAD = 5u; + + const unsigned int PHASE_INSTANT = 0; + const unsigned int PHASE_INTERVAL = 1; + const unsigned int PHASE_START = 2; + const unsigned int PHASE_END = 3; + + const unsigned int SIZE_WITHOUT_PAYLOAD = 5u; + const unsigned int SIZE_WITH_PAYLOAD = 6u; + + ASSERT_TRUE(marker.isArray()); + // The payload is optional. + ASSERT_GE(marker.size(), SIZE_WITHOUT_PAYLOAD); + ASSERT_LE(marker.size(), SIZE_WITH_PAYLOAD); + + // root.threads[0].markers.data[i] is an array with 5 or 6 + // elements. + + ASSERT_TRUE(marker[NAME].isUInt()); // name id + GET_JSON(name, stringTable[marker[NAME].asUInt()], String); + std::string nameString = name.asString(); + + EXPECT_TRUE(marker[START_TIME].isNumeric()); + EXPECT_TRUE(marker[END_TIME].isNumeric()); + EXPECT_TRUE(marker[PHASE].isUInt()); + EXPECT_TRUE(marker[PHASE].asUInt() < 4); + EXPECT_TRUE(marker[CATEGORY].isUInt()); + +# define EXPECT_TIMING_INSTANT \ + EXPECT_NE(marker[START_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INSTANT); +# define EXPECT_TIMING_INTERVAL \ + EXPECT_NE(marker[START_TIME].asDouble(), 0); \ + EXPECT_NE(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INTERVAL); +# define EXPECT_TIMING_START \ + EXPECT_NE(marker[START_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_START); +# define EXPECT_TIMING_END \ + EXPECT_EQ(marker[START_TIME].asDouble(), 0); \ + EXPECT_NE(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_END); + +# define EXPECT_TIMING_INSTANT_AT(t) \ + EXPECT_EQ(marker[START_TIME].asDouble(), t); \ + EXPECT_EQ(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INSTANT); +# define EXPECT_TIMING_INTERVAL_AT(start, end) \ + EXPECT_EQ(marker[START_TIME].asDouble(), start); \ + EXPECT_EQ(marker[END_TIME].asDouble(), end); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INTERVAL); +# define EXPECT_TIMING_START_AT(start) \ + EXPECT_EQ(marker[START_TIME].asDouble(), start); \ + EXPECT_EQ(marker[END_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_START); +# define EXPECT_TIMING_END_AT(end) \ + EXPECT_EQ(marker[START_TIME].asDouble(), 0); \ + EXPECT_EQ(marker[END_TIME].asDouble(), end); \ + EXPECT_EQ(marker[PHASE].asUInt(), PHASE_END); + + if (marker.size() == SIZE_WITHOUT_PAYLOAD) { + // root.threads[0].markers.data[i] is an array with 5 elements, + // so there is no payload. + if (nameString == "M1") { + ASSERT_EQ(state, S_M1); + state = State(state + 1); + } else if (nameString == "M3") { + ASSERT_EQ(state, S_M3); + state = State(state + 1); + } else if (nameString == + "default-templated markers 2.0 with empty options") { + EXPECT_EQ(state, S_Markers2DefaultEmptyOptions); + state = State(S_Markers2DefaultEmptyOptions + 1); +// TODO: Re-enable this when bug 1646714 lands, and check for stack. +# if 0 + } else if (nameString == + "default-templated markers 2.0 with option") { + EXPECT_EQ(state, S_Markers2DefaultWithOptions); + state = State(S_Markers2DefaultWithOptions + 1); +# endif + } else if (nameString == + "explicitly-default-templated markers 2.0 with " + "empty " + "options") { + EXPECT_EQ(state, S_Markers2ExplicitDefaultEmptyOptions); + state = State(S_Markers2ExplicitDefaultEmptyOptions + 1); + } else if (nameString == + "explicitly-default-templated markers 2.0 with " + "option") { + EXPECT_EQ(state, S_Markers2ExplicitDefaultWithOptions); + state = State(S_Markers2ExplicitDefaultWithOptions + 1); + } + } else { + // root.threads[0].markers.data[i] is an array with 6 elements, + // so there is a payload. + GET_JSON(payload, marker[PAYLOAD], Object); + + // root.threads[0].markers.data[i][PAYLOAD] is an object + // (payload). + + // It should at least have a "type" string. + GET_JSON(type, payload["type"], String); + std::string typeString = type.asString(); + + if (nameString == "tracing event") { + EXPECT_EQ(state, S_tracing_event); + state = State(S_tracing_event + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_INSTANT; + EXPECT_EQ_JSON(payload["category"], String, "A"); + EXPECT_TRUE(payload["stack"].isNull()); + + } else if (nameString == "tracing start") { + EXPECT_EQ(state, S_tracing_start); + state = State(S_tracing_start + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_START; + EXPECT_EQ_JSON(payload["category"], String, "A"); + EXPECT_TRUE(payload["stack"].isNull()); + + } else if (nameString == "tracing end") { + EXPECT_EQ(state, S_tracing_end); + state = State(S_tracing_end + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_END; + EXPECT_EQ_JSON(payload["category"], String, "A"); + EXPECT_TRUE(payload["stack"].isNull()); + + } else if (nameString == "tracing event with stack") { + EXPECT_EQ(state, S_tracing_event_with_stack); + state = State(S_tracing_event_with_stack + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_INSTANT; + EXPECT_EQ_JSON(payload["category"], String, "B"); + EXPECT_TRUE(payload["stack"].isObject()); + + } else if (nameString == "auto tracing") { + switch (state) { + case S_tracing_auto_tracing_start: + state = State(S_tracing_auto_tracing_start + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_START; + EXPECT_EQ_JSON(payload["category"], String, "C"); + EXPECT_TRUE(payload["stack"].isNull()); + break; + case S_tracing_auto_tracing_end: + state = State(S_tracing_auto_tracing_end + 1); + EXPECT_EQ(typeString, "tracing"); + EXPECT_TIMING_END; + EXPECT_EQ_JSON(payload["category"], String, "C"); + ASSERT_TRUE(payload["stack"].isNull()); + break; + default: + EXPECT_TRUE(state == S_tracing_auto_tracing_start || + state == S_tracing_auto_tracing_end); + break; + } + + } else if (nameString == + "default-templated markers 2.0 with option") { + // TODO: Remove this when bug 1646714 lands. + EXPECT_EQ(state, S_Markers2DefaultWithOptions); + state = State(S_Markers2DefaultWithOptions + 1); + EXPECT_EQ(typeString, "NoPayloadUserData"); + EXPECT_FALSE(payload["stack"].isNull()); + + } else if (nameString == "FirstMarker") { + // Record start and end times, to compare with timestamps in + // following markers. + EXPECT_EQ(state, S_FirstMarker); + ts1Double = marker[START_TIME].asDouble(); + ts2Double = marker[END_TIME].asDouble(); + state = State(S_FirstMarker + 1); + EXPECT_EQ(typeString, "Text"); + EXPECT_EQ_JSON(payload["name"], String, "First Marker"); + + } else if (nameString == "Gtest custom marker") { + EXPECT_EQ(state, S_CustomMarker); + state = State(S_CustomMarker + 1); + EXPECT_EQ(typeString, "markers-gtest"); + EXPECT_EQ(payload.size(), 1u + 9u); + EXPECT_TRUE(payload["null"].isNull()); + EXPECT_EQ_JSON(payload["bool-false"], Bool, false); + EXPECT_EQ_JSON(payload["bool-true"], Bool, true); + EXPECT_EQ_JSON(payload["int"], Int64, 42); + EXPECT_EQ_JSON(payload["double"], Double, 43.0); + EXPECT_EQ_JSON(payload["text"], String, "gtest text"); + // Unique strings can be fetched from the string table. + ASSERT_TRUE(payload["unique text"].isUInt()); + auto textIndex = payload["unique text"].asUInt(); + GET_JSON(uniqueText, stringTable[textIndex], String); + ASSERT_TRUE(uniqueText.isString()); + ASSERT_EQ(uniqueText.asString(), "gtest unique text"); + // The duplicate unique text should have the exact same index. + EXPECT_EQ_JSON(payload["unique text again"], UInt, textIndex); + EXPECT_EQ_JSON(payload["time"], Double, ts1Double); + + } else if (nameString == "Gtest special marker") { + EXPECT_EQ(state, S_SpecialMarker); + state = State(S_SpecialMarker + 1); + EXPECT_EQ(typeString, "markers-gtest-special"); + EXPECT_EQ(payload.size(), 1u) << "Only 'type' in the payload"; + + } else if (nameString == "Load 1: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_start); + state = State(S_NetworkMarkerPayload_start + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 1); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Hit"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_TRUE(payload["RedirectURI"].isNull()); + EXPECT_TRUE(payload["redirectType"].isNull()); + EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull()); + EXPECT_TRUE(payload["redirectId"].isNull()); + EXPECT_TRUE(payload["contentType"].isNull()); + + } else if (nameString == "Load 2: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_stop); + state = State(S_NetworkMarkerPayload_stop + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 2); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_TRUE(payload["RedirectURI"].isNull()); + EXPECT_TRUE(payload["redirectType"].isNull()); + EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull()); + EXPECT_TRUE(payload["redirectId"].isNull()); + EXPECT_EQ_JSON(payload["contentType"], String, "text/html"); + + } else if (nameString == "Load 3: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_temporary); + state = State(S_NetworkMarkerPayload_redirect_temporary + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 3); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_EQ_JSON(payload["RedirectURI"], String, + "http://example.com/"); + EXPECT_EQ_JSON(payload["redirectType"], String, "Temporary"); + EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false); + EXPECT_EQ_JSON(payload["redirectId"], Int64, 103); + EXPECT_TRUE(payload["contentType"].isNull()); + + } else if (nameString == "Load 4: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_permanent); + state = State(S_NetworkMarkerPayload_redirect_permanent + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 4); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_EQ_JSON(payload["RedirectURI"], String, + "http://example.com/"); + EXPECT_EQ_JSON(payload["redirectType"], String, "Permanent"); + EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false); + EXPECT_EQ_JSON(payload["redirectId"], Int64, 104); + EXPECT_TRUE(payload["contentType"].isNull()); + + } else if (nameString == "Load 5: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_internal); + state = State(S_NetworkMarkerPayload_redirect_internal + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 5); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_EQ_JSON(payload["RedirectURI"], String, + "http://example.com/"); + EXPECT_EQ_JSON(payload["redirectType"], String, "Internal"); + EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false); + EXPECT_EQ_JSON(payload["redirectId"], Int64, 105); + EXPECT_TRUE(payload["contentType"].isNull()); + + } else if (nameString == "Load 6: http://mozilla.org/") { + EXPECT_EQ(state, + S_NetworkMarkerPayload_redirect_internal_sts); + state = + State(S_NetworkMarkerPayload_redirect_internal_sts + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 6); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_TRUE(payload["isPrivateBrowsing"].isNull()); + EXPECT_EQ_JSON(payload["RedirectURI"], String, + "http://example.com/"); + EXPECT_EQ_JSON(payload["redirectType"], String, "Internal"); + EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, true); + EXPECT_EQ_JSON(payload["redirectId"], Int64, 106); + EXPECT_TRUE(payload["contentType"].isNull()); + + } else if (nameString == "Load 7: http://mozilla.org/") { + EXPECT_EQ(state, S_NetworkMarkerPayload_private_browsing); + state = State(S_NetworkMarkerPayload_private_browsing + 1); + EXPECT_EQ(typeString, "Network"); + EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double); + EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double); + EXPECT_EQ_JSON(payload["id"], Int64, 7); + EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/"); + EXPECT_EQ_JSON(payload["requestMethod"], String, "GET"); + EXPECT_EQ_JSON(payload["pri"], Int64, 34); + EXPECT_EQ_JSON(payload["count"], Int64, 56); + EXPECT_EQ_JSON(payload["cache"], String, "Unresolved"); + EXPECT_EQ_JSON(payload["isPrivateBrowsing"], Bool, true); + EXPECT_TRUE(payload["RedirectURI"].isNull()); + EXPECT_TRUE(payload["redirectType"].isNull()); + EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull()); + EXPECT_TRUE(payload["redirectId"].isNull()); + EXPECT_TRUE(payload["contentType"].isNull()); + } else if (nameString == "Text in main thread with stack") { + EXPECT_EQ(state, S_TextWithStack); + state = State(S_TextWithStack + 1); + EXPECT_EQ(typeString, "Text"); + EXPECT_FALSE(payload["stack"].isNull()); + EXPECT_TIMING_INTERVAL_AT(ts1Double, ts2Double); + EXPECT_EQ_JSON(payload["name"], String, ""); + + } else if (nameString == "Text from main thread with stack") { + EXPECT_EQ(state, S_TextToMTWithStack); + state = State(S_TextToMTWithStack + 1); + EXPECT_EQ(typeString, "Text"); + EXPECT_FALSE(payload["stack"].isNull()); + EXPECT_EQ_JSON(payload["name"], String, ""); + + } else if (nameString == + "Text in registered thread with stack") { + ADD_FAILURE() + << "Unexpected 'Text in registered thread with stack'"; + + } else if (nameString == + "Text from registered thread with stack") { + EXPECT_EQ(state, S_RegThread_TextToMTWithStack); + state = State(S_RegThread_TextToMTWithStack + 1); + EXPECT_EQ(typeString, "Text"); + EXPECT_FALSE(payload["stack"].isNull()); + EXPECT_EQ_JSON(payload["name"], String, ""); + + } else if (nameString == + "Text in unregistered thread with stack") { + ADD_FAILURE() + << "Unexpected 'Text in unregistered thread with stack'"; + + } else if (nameString == + "Text from unregistered thread with stack") { + EXPECT_EQ(state, S_UnregThread_TextToMTWithStack); + state = State(S_UnregThread_TextToMTWithStack + 1); + EXPECT_EQ(typeString, "Text"); + EXPECT_TRUE(payload["stack"].isNull()); + EXPECT_EQ_JSON(payload["name"], String, ""); + } + } // marker with payload + } // for (marker : data) + } // markers.data + } // markers + } // thread0 + } // threads + // We should have read all expected markers. + EXPECT_EQ(state, S_LAST); + + { + GET_JSON(meta, root["meta"], Object); + + { + GET_JSON(markerSchema, meta["markerSchema"], Array); + + std::set testedSchemaNames; + + for (const Json::Value& schema : markerSchema) { + GET_JSON(name, schema["name"], String); + const std::string nameString = name.asString(); + + GET_JSON(display, schema["display"], Array); + + GET_JSON(data, schema["data"], Array); + + EXPECT_TRUE( + testedSchemaNames + .insert(std::string(nameString.data(), nameString.size())) + .second) + << "Each schema name should be unique (inserted once in the set)"; + + if (nameString == "Text") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 1u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "name"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Details"); + EXPECT_EQ_JSON(data[0u]["format"], String, "string"); + + } else if (nameString == "NoPayloadUserData") { + // TODO: Remove this when bug 1646714 lands. + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 0u); + + } else if (nameString == "FileIO") { + // These are defined in ProfilerIOInterposeObserver.cpp + + } else if (nameString == "tracing") { + EXPECT_EQ(display.size(), 3u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + EXPECT_EQ(display[2u].asString(), "timeline-overview"); + + ASSERT_EQ(data.size(), 1u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "category"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Type"); + EXPECT_EQ_JSON(data[0u]["format"], String, "string"); + + } else if (nameString == "BHR-detected hang") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 0u); + + } else if (nameString == "MainThreadLongTask") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 1u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "category"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Type"); + EXPECT_EQ_JSON(data[0u]["format"], String, "string"); + + } else if (nameString == "Log") { + EXPECT_EQ(display.size(), 1u); + EXPECT_EQ(display[0u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 2u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "module"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Module"); + EXPECT_EQ_JSON(data[0u]["format"], String, "string"); + + ASSERT_TRUE(data[1u].isObject()); + EXPECT_EQ_JSON(data[1u]["key"], String, "name"); + EXPECT_EQ_JSON(data[1u]["label"], String, "Name"); + EXPECT_EQ_JSON(data[1u]["format"], String, "string"); + + } else if (nameString == "MediaSample") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 3u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "sampleStartTimeUs"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Sample start time"); + EXPECT_EQ_JSON(data[0u]["format"], String, "microseconds"); + + ASSERT_TRUE(data[1u].isObject()); + EXPECT_EQ_JSON(data[1u]["key"], String, "sampleEndTimeUs"); + EXPECT_EQ_JSON(data[1u]["label"], String, "Sample end time"); + EXPECT_EQ_JSON(data[1u]["format"], String, "microseconds"); + + ASSERT_TRUE(data[2u].isObject()); + EXPECT_EQ_JSON(data[2u]["key"], String, "queueLength"); + EXPECT_EQ_JSON(data[2u]["label"], String, "Queue length"); + EXPECT_EQ_JSON(data[2u]["format"], String, "integer"); + + } else if (nameString == "VideoFallingBehind") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 2u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "videoFrameStartTimeUs"); + EXPECT_EQ_JSON(data[0u]["label"], String, "Video frame start time"); + EXPECT_EQ_JSON(data[0u]["format"], String, "microseconds"); + + ASSERT_TRUE(data[1u].isObject()); + EXPECT_EQ_JSON(data[1u]["key"], String, "mediaCurrentTimeUs"); + EXPECT_EQ_JSON(data[1u]["label"], String, "Media current time"); + EXPECT_EQ_JSON(data[1u]["format"], String, "microseconds"); + + } else if (nameString == "Budget") { + EXPECT_EQ(display.size(), 2u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + + ASSERT_EQ(data.size(), 0u); + + } else if (nameString == "markers-gtest") { + EXPECT_EQ(display.size(), 7u); + EXPECT_EQ(display[0u].asString(), "marker-chart"); + EXPECT_EQ(display[1u].asString(), "marker-table"); + EXPECT_EQ(display[2u].asString(), "timeline-overview"); + EXPECT_EQ(display[3u].asString(), "timeline-memory"); + EXPECT_EQ(display[4u].asString(), "timeline-ipc"); + EXPECT_EQ(display[5u].asString(), "timeline-fileio"); + EXPECT_EQ(display[6u].asString(), "stack-chart"); + + EXPECT_EQ_JSON(schema["chartLabel"], String, "chart label"); + EXPECT_EQ_JSON(schema["tooltipLabel"], String, "tooltip label"); + EXPECT_EQ_JSON(schema["tableLabel"], String, "table label"); + + ASSERT_EQ(data.size(), 14u); + + ASSERT_TRUE(data[0u].isObject()); + EXPECT_EQ_JSON(data[0u]["key"], String, "key with url"); + EXPECT_TRUE(data[0u]["label"].isNull()); + EXPECT_EQ_JSON(data[0u]["format"], String, "url"); + EXPECT_TRUE(data[0u]["searchable"].isNull()); + + ASSERT_TRUE(data[1u].isObject()); + EXPECT_EQ_JSON(data[1u]["key"], String, "key with label filePath"); + EXPECT_EQ_JSON(data[1u]["label"], String, "label filePath"); + EXPECT_EQ_JSON(data[1u]["format"], String, "file-path"); + EXPECT_TRUE(data[1u]["searchable"].isNull()); + + ASSERT_TRUE(data[2u].isObject()); + EXPECT_EQ_JSON(data[2u]["key"], String, + "key with string not-searchable"); + EXPECT_TRUE(data[2u]["label"].isNull()); + EXPECT_EQ_JSON(data[2u]["format"], String, "string"); + EXPECT_EQ_JSON(data[2u]["searchable"], Bool, false); + + ASSERT_TRUE(data[3u].isObject()); + EXPECT_EQ_JSON(data[3u]["key"], String, + "key with label duration searchable"); + EXPECT_TRUE(data[3u]["label duration"].isNull()); + EXPECT_EQ_JSON(data[3u]["format"], String, "duration"); + EXPECT_EQ_JSON(data[3u]["searchable"], Bool, true); + + ASSERT_TRUE(data[4u].isObject()); + EXPECT_EQ_JSON(data[4u]["key"], String, "key with time"); + EXPECT_TRUE(data[4u]["label"].isNull()); + EXPECT_EQ_JSON(data[4u]["format"], String, "time"); + EXPECT_TRUE(data[4u]["searchable"].isNull()); + + ASSERT_TRUE(data[5u].isObject()); + EXPECT_EQ_JSON(data[5u]["key"], String, "key with seconds"); + EXPECT_TRUE(data[5u]["label"].isNull()); + EXPECT_EQ_JSON(data[5u]["format"], String, "seconds"); + EXPECT_TRUE(data[5u]["searchable"].isNull()); + + ASSERT_TRUE(data[6u].isObject()); + EXPECT_EQ_JSON(data[6u]["key"], String, "key with milliseconds"); + EXPECT_TRUE(data[6u]["label"].isNull()); + EXPECT_EQ_JSON(data[6u]["format"], String, "milliseconds"); + EXPECT_TRUE(data[6u]["searchable"].isNull()); + + ASSERT_TRUE(data[7u].isObject()); + EXPECT_EQ_JSON(data[7u]["key"], String, "key with microseconds"); + EXPECT_TRUE(data[7u]["label"].isNull()); + EXPECT_EQ_JSON(data[7u]["format"], String, "microseconds"); + EXPECT_TRUE(data[7u]["searchable"].isNull()); + + ASSERT_TRUE(data[8u].isObject()); + EXPECT_EQ_JSON(data[8u]["key"], String, "key with nanoseconds"); + EXPECT_TRUE(data[8u]["label"].isNull()); + EXPECT_EQ_JSON(data[8u]["format"], String, "nanoseconds"); + EXPECT_TRUE(data[8u]["searchable"].isNull()); + + ASSERT_TRUE(data[9u].isObject()); + EXPECT_EQ_JSON(data[9u]["key"], String, "key with bytes"); + EXPECT_TRUE(data[9u]["label"].isNull()); + EXPECT_EQ_JSON(data[9u]["format"], String, "bytes"); + EXPECT_TRUE(data[9u]["searchable"].isNull()); + + ASSERT_TRUE(data[10u].isObject()); + EXPECT_EQ_JSON(data[10u]["key"], String, "key with percentage"); + EXPECT_TRUE(data[10u]["label"].isNull()); + EXPECT_EQ_JSON(data[10u]["format"], String, "percentage"); + EXPECT_TRUE(data[10u]["searchable"].isNull()); + + ASSERT_TRUE(data[11u].isObject()); + EXPECT_EQ_JSON(data[11u]["key"], String, "key with integer"); + EXPECT_TRUE(data[11u]["label"].isNull()); + EXPECT_EQ_JSON(data[11u]["format"], String, "integer"); + EXPECT_TRUE(data[11u]["searchable"].isNull()); + + ASSERT_TRUE(data[12u].isObject()); + EXPECT_EQ_JSON(data[12u]["key"], String, "key with decimal"); + EXPECT_TRUE(data[12u]["label"].isNull()); + EXPECT_EQ_JSON(data[12u]["format"], String, "decimal"); + EXPECT_TRUE(data[12u]["searchable"].isNull()); + + ASSERT_TRUE(data[13u].isObject()); + EXPECT_EQ_JSON(data[13u]["label"], String, "static label"); + EXPECT_EQ_JSON(data[13u]["value"], String, "static value"); + + } else if (nameString == "markers-gtest-special") { + EXPECT_EQ(display.size(), 0u); + ASSERT_EQ(data.size(), 0u); + + } else if (nameString == "markers-gtest-unused") { + ADD_FAILURE() << "Schema for GtestUnusedMarker should not be here"; + + } else { + printf("FYI: Unknown marker schema '%s'\n", nameString.c_str()); + } + } + + // Check that we've got all expected schema. + EXPECT_TRUE(testedSchemaNames.find("Text") != testedSchemaNames.end()); + EXPECT_TRUE(testedSchemaNames.find("tracing") != + testedSchemaNames.end()); + EXPECT_TRUE(testedSchemaNames.find("MediaSample") != + testedSchemaNames.end()); + } // markerSchema + } // meta + }); + + Maybe info = profiler_get_buffer_info(); + EXPECT_TRUE(info.isSome()); + printf("Profiler buffer range: %llu .. %llu (%llu bytes)\n", + static_cast(info->mRangeStart), + static_cast(info->mRangeEnd), + // sizeof(ProfileBufferEntry) == 9 + (static_cast(info->mRangeEnd) - + static_cast(info->mRangeStart)) * + 9); + printf("Stats: min(us) .. mean(us) .. max(us) [count]\n"); + printf("- Intervals: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mIntervalsUs.min, info->mIntervalsUs.sum / info->mIntervalsUs.n, + info->mIntervalsUs.max, info->mIntervalsUs.n); + printf("- Overheads: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mOverheadsUs.min, info->mOverheadsUs.sum / info->mOverheadsUs.n, + info->mOverheadsUs.max, info->mOverheadsUs.n); + printf(" - Locking: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mLockingsUs.min, info->mLockingsUs.sum / info->mLockingsUs.n, + info->mLockingsUs.max, info->mLockingsUs.n); + printf(" - Clearning: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mCleaningsUs.min, info->mCleaningsUs.sum / info->mCleaningsUs.n, + info->mCleaningsUs.max, info->mCleaningsUs.n); + printf(" - Counters: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mCountersUs.min, info->mCountersUs.sum / info->mCountersUs.n, + info->mCountersUs.max, info->mCountersUs.n); + printf(" - Threads: %7.1f .. %7.1f .. %7.1f [%u]\n", + info->mThreadsUs.min, info->mThreadsUs.sum / info->mThreadsUs.n, + info->mThreadsUs.max, info->mThreadsUs.n); + + profiler_stop(); + + // Try to add markers while the profiler is stopped. + PROFILER_MARKER_UNTYPED("marker after profiler_stop", OTHER); + + // Warning: this could be racy + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + // This last marker shouldn't get streamed. + SpliceableChunkedJSONWriter w2{FailureLatchInfallibleSource::Singleton()}; + w2.Start(); + EXPECT_TRUE(::profiler_stream_json_for_this_process(w2).isOk()); + w2.End(); + EXPECT_FALSE(w2.Failed()); + UniquePtr profile2 = w2.ChunkedWriteFunc().CopyData(); + ASSERT_TRUE(!!profile2.get()); + EXPECT_TRUE( + std::string_view(profile2.get()).find("marker after profiler_stop") == + std::string_view::npos); + + profiler_stop(); +} + +# define COUNTER_NAME "TestCounter" +# define COUNTER_DESCRIPTION "Test of counters in profiles" +# define COUNTER_NAME2 "Counter2" +# define COUNTER_DESCRIPTION2 "Second Test of counters in profiles" + +PROFILER_DEFINE_COUNT_TOTAL(TestCounter, COUNTER_NAME, COUNTER_DESCRIPTION); +PROFILER_DEFINE_COUNT_TOTAL(TestCounter2, COUNTER_NAME2, COUNTER_DESCRIPTION2); + +TEST(GeckoProfiler, Counters) +{ + uint32_t features = 0; + const char* filters[] = {"GeckoMain"}; + + // We will record some counter values, and check that they're present (and no + // other) when expected. + + struct NumberAndCount { + uint64_t mNumber; + int64_t mCount; + }; + + int64_t testCounters[] = {10, 7, -17}; + NumberAndCount expectedTestCounters[] = {{1u, 10}, {0u, 0}, {1u, 7}, + {0u, 0}, {0u, 0}, {1u, -17}, + {0u, 0}, {0u, 0}}; + constexpr size_t expectedTestCountersCount = + MOZ_ARRAY_LENGTH(expectedTestCounters); + + bool expectCounter2 = false; + int64_t testCounters2[] = {10}; + NumberAndCount expectedTestCounters2[] = {{1u, 10}, {0u, 0}}; + constexpr size_t expectedTestCounters2Count = + MOZ_ARRAY_LENGTH(expectedTestCounters2); + + auto checkCountersInJSON = [&](const Json::Value& aRoot) { + size_t nextExpectedTestCounter = 0u; + size_t nextExpectedTestCounter2 = 0u; + + GET_JSON(counters, aRoot["counters"], Array); + for (const Json::Value& counter : counters) { + ASSERT_TRUE(counter.isObject()); + GET_JSON_VALUE(name, counter["name"], String); + if (name == "TestCounter") { + EXPECT_EQ_JSON(counter["category"], String, COUNTER_NAME); + EXPECT_EQ_JSON(counter["description"], String, COUNTER_DESCRIPTION); + GET_JSON(sampleGroups, counter["sample_groups"], Array); + for (const Json::Value& sampleGroup : sampleGroups) { + ASSERT_TRUE(sampleGroup.isObject()); + EXPECT_EQ_JSON(sampleGroup["id"], UInt, 0u); + + GET_JSON(samples, sampleGroup["samples"], Object); + GET_JSON(samplesSchema, samples["schema"], Object); + EXPECT_GE(samplesSchema.size(), 3u); + GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt); + GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt); + GET_JSON(samplesData, samples["data"], Array); + for (const Json::Value& sample : samplesData) { + ASSERT_TRUE(sample.isArray()); + ASSERT_LT(nextExpectedTestCounter, expectedTestCountersCount); + EXPECT_EQ_JSON( + sample[samplesNumber], UInt64, + expectedTestCounters[nextExpectedTestCounter].mNumber); + EXPECT_EQ_JSON( + sample[samplesCount], Int64, + expectedTestCounters[nextExpectedTestCounter].mCount); + ++nextExpectedTestCounter; + } + } + } else if (name == "TestCounter2") { + EXPECT_TRUE(expectCounter2); + + EXPECT_EQ_JSON(counter["category"], String, COUNTER_NAME2); + EXPECT_EQ_JSON(counter["description"], String, COUNTER_DESCRIPTION2); + GET_JSON(sampleGroups, counter["sample_groups"], Array); + for (const Json::Value& sampleGroup : sampleGroups) { + ASSERT_TRUE(sampleGroup.isObject()); + EXPECT_EQ_JSON(sampleGroup["id"], UInt, 0u); + + GET_JSON(samples, sampleGroup["samples"], Object); + GET_JSON(samplesSchema, samples["schema"], Object); + EXPECT_GE(samplesSchema.size(), 3u); + GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt); + GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt); + GET_JSON(samplesData, samples["data"], Array); + for (const Json::Value& sample : samplesData) { + ASSERT_TRUE(sample.isArray()); + ASSERT_LT(nextExpectedTestCounter2, expectedTestCounters2Count); + EXPECT_EQ_JSON( + sample[samplesNumber], UInt64, + expectedTestCounters2[nextExpectedTestCounter2].mNumber); + EXPECT_EQ_JSON( + sample[samplesCount], Int64, + expectedTestCounters2[nextExpectedTestCounter2].mCount); + ++nextExpectedTestCounter2; + } + } + } + } + + EXPECT_EQ(nextExpectedTestCounter, expectedTestCountersCount); + if (expectCounter2) { + EXPECT_EQ(nextExpectedTestCounter2, expectedTestCounters2Count); + } + }; + + // Inactive -> Active + profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + // Output all "TestCounter"s, with increasing delays (to test different + // number of counter samplings). + int samplingWaits = 2; + for (int64_t counter : testCounters) { + AUTO_PROFILER_COUNT_TOTAL(TestCounter, counter); + for (int i = 0; i < samplingWaits; ++i) { + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + ++samplingWaits; + } + + // Verify we got "TestCounter" in the output, but not "TestCounter2" yet. + UniquePtr profile = profiler_get_profile(); + JSONOutputCheck(profile.get(), checkCountersInJSON); + + // Now introduce TestCounter2. + expectCounter2 = true; + for (int64_t counter2 : testCounters2) { + AUTO_PROFILER_COUNT_TOTAL(TestCounter2, counter2); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + + // Verify we got both "TestCounter" and "TestCounter2" in the output. + profile = profiler_get_profile(); + JSONOutputCheck(profile.get(), checkCountersInJSON); + + profiler_stop(); +} + +TEST(GeckoProfiler, Time) +{ + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + double t1 = profiler_time(); + double t2 = profiler_time(); + ASSERT_TRUE(t1 <= t2); + + // profiler_start() restarts the timer used by profiler_time(). + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + double t3 = profiler_time(); + double t4 = profiler_time(); + ASSERT_TRUE(t3 <= t4); + + profiler_stop(); + + double t5 = profiler_time(); + double t6 = profiler_time(); + ASSERT_TRUE(t4 <= t5 && t1 <= t6); +} + +TEST(GeckoProfiler, GetProfile) +{ + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + ASSERT_TRUE(!profiler_get_profile()); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + mozilla::Maybe activeFeatures = profiler_features_if_active(); + ASSERT_TRUE(activeFeatures.isSome()); + // Not all platforms support stack-walking. + const bool hasStackWalk = ProfilerFeature::HasStackWalk(*activeFeatures); + + UniquePtr profile = profiler_get_profile(); + JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) { + GET_JSON(meta, aRoot["meta"], Object); + { + GET_JSON(configuration, meta["configuration"], Object); + { + GET_JSON(features, configuration["features"], Array); + { + EXPECT_EQ(features.size(), (hasStackWalk ? 1u : 0u)); + if (hasStackWalk) { + EXPECT_JSON_ARRAY_CONTAINS(features, String, "stackwalk"); + } + } + GET_JSON(threads, configuration["threads"], Array); + { + EXPECT_EQ(threads.size(), 1u); + EXPECT_JSON_ARRAY_CONTAINS(threads, String, "GeckoMain"); + } + } + } + }); + + profiler_stop(); + + ASSERT_TRUE(!profiler_get_profile()); +} + +TEST(GeckoProfiler, StreamJSONForThisProcess) +{ + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()}; + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Fallible()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(&w.ChunkedWriteFunc().SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT( + &std::as_const(w.ChunkedWriteFunc()).SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT(!w.Fallible()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + MOZ_RELEASE_ASSERT(&w.SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT(&std::as_const(w).SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + + ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + w.Start(); + ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isOk()); + w.End(); + + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + + UniquePtr profile = w.ChunkedWriteFunc().CopyData(); + + JSONOutputCheck(profile.get(), [](const Json::Value&) {}); + + profiler_stop(); + + ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr()); +} + +// Internal version of profiler_stream_json_for_this_process, which allows being +// called from a non-main thread of the parent process, at the risk of getting +// an incomplete profile. +ProfilerResult +do_profiler_stream_json_for_this_process( + SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + +TEST(GeckoProfiler, StreamJSONForThisProcessThreaded) +{ + // Same as the previous test, but calling some things on background threads. + nsCOMPtr thread; + nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread)); + ASSERT_NS_SUCCEEDED(rv); + + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()}; + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Fallible()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(&w.ChunkedWriteFunc().SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT( + &std::as_const(w.ChunkedWriteFunc()).SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT(!w.Fallible()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + MOZ_RELEASE_ASSERT(&w.SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + MOZ_RELEASE_ASSERT(&std::as_const(w).SourceFailureLatch() == + &mozilla::FailureLatchInfallibleSource::Singleton()); + + ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + + // Start the profiler on the main thread. + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + // Call profiler_stream_json_for_this_process on a background thread. + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody"_ns, + thread, + NS_NewRunnableFunction( + "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody", + [&]() { + w.Start(); + ASSERT_TRUE(::do_profiler_stream_json_for_this_process( + w, /* double aSinceTime */ 0.0, + /* bool aIsShuttingDown */ false, + /* ProfilerCodeAddressService* aService */ nullptr, + mozilla::ProgressLogger{}) + .isOk()); + w.End(); + })); + + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed()); + MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure()); + MOZ_RELEASE_ASSERT(!w.Failed()); + MOZ_RELEASE_ASSERT(!w.GetFailure()); + + UniquePtr profile = w.ChunkedWriteFunc().CopyData(); + + JSONOutputCheck(profile.get(), [](const Json::Value&) {}); + + // Stop the profiler and call profiler_stream_json_for_this_process on a + // background thread. + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody"_ns, + thread, + NS_NewRunnableFunction( + "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody", + [&]() { + profiler_stop(); + ASSERT_TRUE(::do_profiler_stream_json_for_this_process( + w, /* double aSinceTime */ 0.0, + /* bool aIsShuttingDown */ false, + /* ProfilerCodeAddressService* aService */ nullptr, + mozilla::ProgressLogger{}) + .isErr()); + })); + thread->Shutdown(); + + // Call profiler_stream_json_for_this_process on the main thread. + ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr()); +} + +TEST(GeckoProfiler, ProfilingStack) +{ + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + + AUTO_PROFILER_LABEL("A::B", OTHER); + + UniqueFreePtr dynamic(strdup("dynamic")); + { + AUTO_PROFILER_LABEL_DYNAMIC_CSTR("A::C", JS, dynamic.get()); + AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING("A::C2", JS, + nsDependentCString(dynamic.get())); + AUTO_PROFILER_LABEL_DYNAMIC_LOSSY_NSSTRING( + "A::C3", JS, NS_ConvertUTF8toUTF16(dynamic.get())); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_TRUE(profiler_get_backtrace()); + } + + AutoProfilerLabel label1("A", nullptr, JS::ProfilingCategoryPair::DOM); + AutoProfilerLabel label2("A", dynamic.get(), + JS::ProfilingCategoryPair::NETWORK); + ASSERT_TRUE(profiler_get_backtrace()); + + profiler_stop(); + + ASSERT_TRUE(!profiler_get_profile()); +} + +TEST(GeckoProfiler, Bug1355807) +{ + uint32_t features = ProfilerFeature::JS; + const char* manyThreadsFilter[] = {""}; + const char* fewThreadsFilter[] = {"GeckoMain"}; + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + manyThreadsFilter, MOZ_ARRAY_LENGTH(manyThreadsFilter), 0); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + fewThreadsFilter, MOZ_ARRAY_LENGTH(fewThreadsFilter), 0); + + // In bug 1355807 this caused an assertion failure in StopJSSampling(). + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + fewThreadsFilter, MOZ_ARRAY_LENGTH(fewThreadsFilter), 0); + + profiler_stop(); +} + +class GTestStackCollector final : public ProfilerStackCollector { + public: + GTestStackCollector() : mSetIsMainThread(0), mFrames(0) {} + + virtual void SetIsMainThread() { mSetIsMainThread++; } + + virtual void CollectNativeLeafAddr(void* aAddr) { mFrames++; } + virtual void CollectJitReturnAddr(void* aAddr) { mFrames++; } + virtual void CollectWasmFrame(const char* aLabel) { mFrames++; } + virtual void CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) { + mFrames++; + } + + int mSetIsMainThread; + int mFrames; +}; + +void DoSuspendAndSample(ProfilerThreadId aTidToSample, + nsIThread* aSamplingThread) { + NS_DispatchAndSpinEventLoopUntilComplete( + "GeckoProfiler_SuspendAndSample_Test::TestBody"_ns, aSamplingThread, + NS_NewRunnableFunction( + "GeckoProfiler_SuspendAndSample_Test::TestBody", [&]() { + uint32_t features = ProfilerFeature::CPUUtilization; + GTestStackCollector collector; + profiler_suspend_and_sample_thread(aTidToSample, features, + collector, + /* sampleNative = */ true); + + ASSERT_TRUE(collector.mSetIsMainThread == + (aTidToSample == profiler_main_thread_id())); + ASSERT_TRUE(collector.mFrames > 0); + })); +} + +TEST(GeckoProfiler, SuspendAndSample) +{ + nsCOMPtr thread; + nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread)); + ASSERT_NS_SUCCEEDED(rv); + + ProfilerThreadId tid = profiler_current_thread_id(); + + ASSERT_TRUE(!profiler_is_active()); + + // Suspend and sample while the profiler is inactive. + DoSuspendAndSample(tid, thread); + + DoSuspendAndSample(ProfilerThreadId{}, thread); + + uint32_t features = ProfilerFeature::JS; + const char* filters[] = {"GeckoMain", "Compositor"}; + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_TRUE(profiler_is_active()); + + // Suspend and sample while the profiler is active. + DoSuspendAndSample(tid, thread); + + DoSuspendAndSample(ProfilerThreadId{}, thread); + + profiler_stop(); + + ASSERT_TRUE(!profiler_is_active()); +} + +TEST(GeckoProfiler, PostSamplingCallback) +{ + const char* filters[] = {"GeckoMain"}; + + ASSERT_TRUE(!profiler_is_active()); + ASSERT_TRUE(!profiler_callback_after_sampling( + [&](SamplingState) { ASSERT_TRUE(false); })); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters), + 0); + { + // Stack sampling -> This label should appear at least once. + AUTO_PROFILER_LABEL("PostSamplingCallback completed", OTHER); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + UniquePtr profileCompleted = profiler_get_profile(); + JSONOutputCheck(profileCompleted.get(), [](const Json::Value& aRoot) { + GET_JSON(threads, aRoot["threads"], Array); + { + GET_JSON(thread0, threads[0], Object); + { + EXPECT_JSON_ARRAY_CONTAINS(thread0["stringTable"], String, + "PostSamplingCallback completed"); + } + } + }); + + profiler_pause(); + { + // Paused -> This label should not appear. + AUTO_PROFILER_LABEL("PostSamplingCallback paused", OTHER); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingPaused); + } + UniquePtr profilePaused = profiler_get_profile(); + JSONOutputCheck(profilePaused.get(), [](const Json::Value& aRoot) {}); + // This string shouldn't appear *anywhere* in the profile. + ASSERT_FALSE(strstr(profilePaused.get(), "PostSamplingCallback paused")); + + profiler_resume(); + { + // Stack sampling -> This label should appear at least once. + AUTO_PROFILER_LABEL("PostSamplingCallback resumed", OTHER); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + UniquePtr profileResumed = profiler_get_profile(); + JSONOutputCheck(profileResumed.get(), [](const Json::Value& aRoot) { + GET_JSON(threads, aRoot["threads"], Array); + { + GET_JSON(thread0, threads[0], Object); + { + EXPECT_JSON_ARRAY_CONTAINS(thread0["stringTable"], String, + "PostSamplingCallback resumed"); + } + } + }); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk | ProfilerFeature::NoStackSampling, + filters, MOZ_ARRAY_LENGTH(filters), 0); + { + // No stack sampling -> This label should not appear. + AUTO_PROFILER_LABEL("PostSamplingCallback completed (no stacks)", OTHER); + ASSERT_EQ(WaitForSamplingState(), SamplingState::NoStackSamplingCompleted); + } + UniquePtr profileNoStacks = profiler_get_profile(); + JSONOutputCheck(profileNoStacks.get(), [](const Json::Value& aRoot) {}); + // This string shouldn't appear *anywhere* in the profile. + ASSERT_FALSE(strstr(profileNoStacks.get(), + "PostSamplingCallback completed (no stacks)")); + + // Note: There is no non-racy way to test for SamplingState::JustStopped, as + // it would require coordination between `profiler_stop()` and another thread + // doing `profiler_callback_after_sampling()` at just the right moment. + + profiler_stop(); + ASSERT_TRUE(!profiler_is_active()); + ASSERT_TRUE(!profiler_callback_after_sampling( + [&](SamplingState) { ASSERT_TRUE(false); })); +} + +TEST(GeckoProfiler, ProfilingStateCallback) +{ + const char* filters[] = {"GeckoMain"}; + + ASSERT_TRUE(!profiler_is_active()); + + struct ProfilingStateAndId { + ProfilingState mProfilingState; + int mId; + }; + DataMutex> states{"Profiling states"}; + auto CreateCallback = [&states](int id) { + return [id, &states](ProfilingState aProfilingState) { + auto lockedStates = states.Lock(); + ASSERT_TRUE( + lockedStates->append(ProfilingStateAndId{aProfilingState, id})); + }; + }; + auto CheckStatesIsEmpty = [&states]() { + auto lockedStates = states.Lock(); + EXPECT_TRUE(lockedStates->empty()); + }; + auto CheckStatesOnlyContains = [&states](ProfilingState aProfilingState, + int aId) { + auto lockedStates = states.Lock(); + EXPECT_EQ(lockedStates->length(), 1u); + if (lockedStates->length() >= 1u) { + EXPECT_EQ((*lockedStates)[0].mProfilingState, aProfilingState); + EXPECT_EQ((*lockedStates)[0].mId, aId); + } + lockedStates->clear(); + }; + + profiler_add_state_change_callback(AllProfilingStates(), CreateCallback(1), + 1); + // This is in case of error, and it also exercises the (allowed) removal of + // unknown callback ids. + auto cleanup1 = mozilla::MakeScopeExit( + []() { profiler_remove_state_change_callback(1); }); + CheckStatesIsEmpty(); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters), + 0); + + CheckStatesOnlyContains(ProfilingState::Started, 1); + + profiler_add_state_change_callback(AllProfilingStates(), CreateCallback(2), + 2); + // This is in case of error, and it also exercises the (allowed) removal of + // unknown callback ids. + auto cleanup2 = mozilla::MakeScopeExit( + []() { profiler_remove_state_change_callback(2); }); + CheckStatesOnlyContains(ProfilingState::AlreadyActive, 2); + + profiler_remove_state_change_callback(2); + CheckStatesOnlyContains(ProfilingState::RemovingCallback, 2); + // Note: The actual removal is effectively tested below, by not seeing any + // more invocations of the 2nd callback. + + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + UniquePtr profileCompleted = profiler_get_profile(); + CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1); + JSONOutputCheck(profileCompleted.get(), [](const Json::Value& aRoot) {}); + + profiler_pause(); + CheckStatesOnlyContains(ProfilingState::Pausing, 1); + UniquePtr profilePaused = profiler_get_profile(); + CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1); + JSONOutputCheck(profilePaused.get(), [](const Json::Value& aRoot) {}); + + profiler_resume(); + CheckStatesOnlyContains(ProfilingState::Resumed, 1); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + UniquePtr profileResumed = profiler_get_profile(); + CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1); + JSONOutputCheck(profileResumed.get(), [](const Json::Value& aRoot) {}); + + // This effectively stops the profiler before restarting it, but + // ProfilingState::Stopping is not notified. See `profiler_start` for details. + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk | ProfilerFeature::NoStackSampling, + filters, MOZ_ARRAY_LENGTH(filters), 0); + CheckStatesOnlyContains(ProfilingState::Started, 1); + ASSERT_EQ(WaitForSamplingState(), SamplingState::NoStackSamplingCompleted); + UniquePtr profileNoStacks = profiler_get_profile(); + CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1); + JSONOutputCheck(profileNoStacks.get(), [](const Json::Value& aRoot) {}); + + profiler_stop(); + CheckStatesOnlyContains(ProfilingState::Stopping, 1); + ASSERT_TRUE(!profiler_is_active()); + + profiler_remove_state_change_callback(1); + CheckStatesOnlyContains(ProfilingState::RemovingCallback, 1); + + // Note: ProfilingState::ShuttingDown cannot be tested here, and the profiler + // can only be shut down once per process. +} + +TEST(GeckoProfiler, BaseProfilerHandOff) +{ + const char* filters[] = {"GeckoMain"}; + + ASSERT_TRUE(!baseprofiler::profiler_is_active()); + ASSERT_TRUE(!profiler_is_active()); + + BASE_PROFILER_MARKER_UNTYPED("Base marker before base profiler", OTHER, {}); + PROFILER_MARKER_UNTYPED("Gecko marker before base profiler", OTHER, {}); + + // Start the Base Profiler. + baseprofiler::profiler_start( + PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters)); + + ASSERT_TRUE(baseprofiler::profiler_is_active()); + ASSERT_TRUE(!profiler_is_active()); + + // Add at least a marker, which should go straight into the buffer. + Maybe info0 = + baseprofiler::profiler_get_buffer_info(); + BASE_PROFILER_MARKER_UNTYPED("Base marker during base profiler", OTHER, {}); + Maybe info1 = + baseprofiler::profiler_get_buffer_info(); + ASSERT_GT(info1->mRangeEnd, info0->mRangeEnd); + + PROFILER_MARKER_UNTYPED("Gecko marker during base profiler", OTHER, {}); + + // Start the Gecko Profiler, which should grab the Base Profiler profile and + // stop it. + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters), + 0); + + ASSERT_TRUE(!baseprofiler::profiler_is_active()); + ASSERT_TRUE(profiler_is_active()); + + BASE_PROFILER_MARKER_UNTYPED("Base marker during gecko profiler", OTHER, {}); + PROFILER_MARKER_UNTYPED("Gecko marker during gecko profiler", OTHER, {}); + + // Write some Gecko Profiler samples. + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + + // Check that the Gecko Profiler profile contains at least the Base Profiler + // main thread samples. + UniquePtr profile = profiler_get_profile(); + + profiler_stop(); + ASSERT_TRUE(!profiler_is_active()); + + BASE_PROFILER_MARKER_UNTYPED("Base marker after gecko profiler", OTHER, {}); + PROFILER_MARKER_UNTYPED("Gecko marker after gecko profiler", OTHER, {}); + + JSONOutputCheck(profile.get(), [](const Json::Value& aRoot) { + GET_JSON(threads, aRoot["threads"], Array); + { + bool found = false; + for (const Json::Value& thread : threads) { + ASSERT_TRUE(thread.isObject()); + GET_JSON(name, thread["name"], String); + if (name.asString() == "GeckoMain") { + found = true; + EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String, + "Base marker before base profiler"); + EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String, + "Gecko marker before base profiler"); + EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String, + "Base marker during base profiler"); + EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String, + "Gecko marker during base profiler"); + EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String, + "Base marker during gecko profiler"); + EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String, + "Gecko marker during gecko profiler"); + EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String, + "Base marker after gecko profiler"); + EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String, + "Gecko marker after gecko profiler"); + break; + } + } + EXPECT_TRUE(found); + } + }); +} + +static std::string_view GetFeatureName(uint32_t feature) { + switch (feature) { +# define FEATURE_NAME(n_, str_, Name_, desc_) \ + case ProfilerFeature::Name_: \ + return str_; + + PROFILER_FOR_EACH_FEATURE(FEATURE_NAME) + +# undef FEATURE_NAME + + default: + return "?"; + } +} + +TEST(GeckoProfiler, FeatureCombinations) +{ + // Bug 1845606 + #ifdef XP_WIN + if (!IsWin8OrLater()) { + return; + } + #endif + + const char* filters[] = {"*"}; + + // List of features to test. Every combination of up to 3 of them will be + // tested, so be careful not to add too many to keep the test run at a + // reasonable time. + uint32_t featureList[] = {ProfilerFeature::JS, + ProfilerFeature::Screenshots, + ProfilerFeature::StackWalk, + ProfilerFeature::NoStackSampling, + ProfilerFeature::NativeAllocations, + ProfilerFeature::CPUUtilization, + ProfilerFeature::CPUAllThreads, + ProfilerFeature::SamplingAllThreads, + ProfilerFeature::MarkersAllThreads, + ProfilerFeature::UnregisteredThreads}; + constexpr uint32_t featureCount = uint32_t(MOZ_ARRAY_LENGTH(featureList)); + + auto testFeatures = [&](uint32_t features, + const std::string& featuresString) { + SCOPED_TRACE(featuresString.c_str()); + + ASSERT_TRUE(!profiler_is_active()); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_TRUE(profiler_is_active()); + + // Write some Gecko Profiler samples. + EXPECT_EQ(WaitForSamplingState(), + (((features & ProfilerFeature::NoStackSampling) != 0) && + ((features & (ProfilerFeature::CPUUtilization | + ProfilerFeature::CPUAllThreads)) == 0)) + ? SamplingState::NoStackSamplingCompleted + : SamplingState::SamplingCompleted); + + // Check that the profile looks valid. Note that we don't test feature- + // specific changes. + UniquePtr profile = profiler_get_profile(); + JSONOutputCheck(profile.get(), [](const Json::Value& aRoot) {}); + + profiler_stop(); + ASSERT_TRUE(!profiler_is_active()); + }; + + testFeatures(0, "Features: (none)"); + + for (uint32_t f1 = 0u; f1 < featureCount; ++f1) { + const uint32_t features1 = featureList[f1]; + std::string features1String = "Features: "; + features1String += GetFeatureName(featureList[f1]); + + testFeatures(features1, features1String); + + for (uint32_t f2 = f1 + 1u; f2 < featureCount; ++f2) { + const uint32_t features12 = f1 | featureList[f2]; + std::string features12String = features1String + " "; + features12String += GetFeatureName(featureList[f2]); + + testFeatures(features12, features12String); + + for (uint32_t f3 = f2 + 1u; f3 < featureCount; ++f3) { + const uint32_t features123 = features12 | featureList[f3]; + std::string features123String = features12String + " "; + features123String += GetFeatureName(featureList[f3]); + + testFeatures(features123, features123String); + } + } + } +} + +static void CountCPUDeltas(const Json::Value& aThread, size_t& aOutSamplings, + uint64_t& aOutCPUDeltaSum) { + GET_JSON(samples, aThread["samples"], Object); + { + Json::ArrayIndex threadCPUDeltaIndex = 0; + GET_JSON(schema, samples["schema"], Object); + { + GET_JSON(jsonThreadCPUDeltaIndex, schema["threadCPUDelta"], UInt); + threadCPUDeltaIndex = jsonThreadCPUDeltaIndex.asUInt(); + } + + aOutSamplings = 0; + aOutCPUDeltaSum = 0; + GET_JSON(data, samples["data"], Array); + aOutSamplings = data.size(); + for (const Json::Value& sample : data) { + ASSERT_TRUE(sample.isArray()); + if (sample.isValidIndex(threadCPUDeltaIndex)) { + if (!sample[threadCPUDeltaIndex].isNull()) { + GET_JSON(cpuDelta, sample[threadCPUDeltaIndex], UInt64); + aOutCPUDeltaSum += uint64_t(cpuDelta.asUInt64()); + } + } + } + } +} + +TEST(GeckoProfiler, CPUUsage) +{ + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + const char* filters[] = {"GeckoMain", "Idle test", "Busy test"}; + + enum class TestThreadsState { + // Initial state, while constructing and starting the idle thread. + STARTING, + // Set by the idle thread just before running its main mostly-idle loop. + RUNNING1, + RUNNING2, + // Set by the main thread when it wants the idle thread to stop. + STOPPING + }; + Atomic testThreadsState{TestThreadsState::STARTING}; + + std::thread idle([&]() { + AUTO_PROFILER_REGISTER_THREAD("Idle test"); + // Add a label to ensure that we have a non-empty stack, even if native + // stack-walking is not available. + AUTO_PROFILER_LABEL("Idle test", PROFILER); + ASSERT_TRUE(testThreadsState.compareExchange(TestThreadsState::STARTING, + TestThreadsState::RUNNING1) || + testThreadsState.compareExchange(TestThreadsState::RUNNING1, + TestThreadsState::RUNNING2)); + + while (testThreadsState != TestThreadsState::STOPPING) { + // Sleep for multiple profiler intervals, so the profiler should have + // samples with zero CPU utilization. + PR_Sleep(PR_MillisecondsToInterval(PROFILER_DEFAULT_INTERVAL * 10)); + } + }); + + std::thread busy([&]() { + AUTO_PROFILER_REGISTER_THREAD("Busy test"); + // Add a label to ensure that we have a non-empty stack, even if native + // stack-walking is not available. + AUTO_PROFILER_LABEL("Busy test", PROFILER); + ASSERT_TRUE(testThreadsState.compareExchange(TestThreadsState::STARTING, + TestThreadsState::RUNNING1) || + testThreadsState.compareExchange(TestThreadsState::RUNNING1, + TestThreadsState::RUNNING2)); + + while (testThreadsState != TestThreadsState::STOPPING) { + // Stay busy! + } + }); + + // Wait for idle thread to start running its main loop. + while (testThreadsState != TestThreadsState::RUNNING2) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + + // We want to ensure that CPU usage numbers are present whether or not we are + // collecting stack samples. + static constexpr bool scTestsWithOrWithoutStackSampling[] = {false, true}; + for (const bool testWithNoStackSampling : scTestsWithOrWithoutStackSampling) { + ASSERT_TRUE(!profiler_is_active()); + ASSERT_TRUE(!profiler_callback_after_sampling( + [&](SamplingState) { ASSERT_TRUE(false); })); + + profiler_start( + PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization | + (testWithNoStackSampling ? ProfilerFeature::NoStackSampling : 0), + filters, MOZ_ARRAY_LENGTH(filters), 0); + // Grab a few samples, each with a different label on the stack. +# define SAMPLE_LABEL_PREFIX "CPUUsage sample label " + static constexpr const char* scSampleLabels[] = { + SAMPLE_LABEL_PREFIX "0", SAMPLE_LABEL_PREFIX "1", + SAMPLE_LABEL_PREFIX "2", SAMPLE_LABEL_PREFIX "3", + SAMPLE_LABEL_PREFIX "4", SAMPLE_LABEL_PREFIX "5", + SAMPLE_LABEL_PREFIX "6", SAMPLE_LABEL_PREFIX "7", + SAMPLE_LABEL_PREFIX "8", SAMPLE_LABEL_PREFIX "9"}; + static constexpr size_t scSampleLabelCount = + (sizeof(scSampleLabels) / sizeof(scSampleLabels[0])); + // We'll do two samplings for each label. + static constexpr size_t scMinSamplings = scSampleLabelCount * 2; + + for (const char* sampleLabel : scSampleLabels) { + AUTO_PROFILER_LABEL(sampleLabel, OTHER); + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + // Note: There could have been a delay before this label above, where the + // profiler could have sampled the stack and missed the label. By forcing + // another sampling now, the label is guaranteed to be present. + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + + UniquePtr profile = profiler_get_profile(); + + if (testWithNoStackSampling) { + // If we are testing nostacksampling, we shouldn't find this label prefix + // in the profile. + EXPECT_FALSE(strstr(profile.get(), SAMPLE_LABEL_PREFIX)); + } else { + // In normal sampling mode, we should find all labels. + for (const char* sampleLabel : scSampleLabels) { + EXPECT_TRUE(strstr(profile.get(), sampleLabel)); + } + } + + JSONOutputCheck(profile.get(), [testWithNoStackSampling]( + const Json::Value& aRoot) { + // Check that the "cpu" feature is present. + GET_JSON(meta, aRoot["meta"], Object); + { + GET_JSON(configuration, meta["configuration"], Object); + { + GET_JSON(features, configuration["features"], Array); + { EXPECT_JSON_ARRAY_CONTAINS(features, String, "cpu"); } + } + } + + { + GET_JSON(sampleUnits, meta["sampleUnits"], Object); + { + EXPECT_EQ_JSON(sampleUnits["time"], String, "ms"); + EXPECT_EQ_JSON(sampleUnits["eventDelay"], String, "ms"); +# if defined(GP_OS_windows) || defined(GP_OS_darwin) || \ + defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // Note: The exact string is not important here. + EXPECT_TRUE(sampleUnits["threadCPUDelta"].isString()) + << "There should be a sampleUnits.threadCPUDelta on this " + "platform"; +# else + EXPECT_FALSE(sampleUnits.isMember("threadCPUDelta")) + << "Unexpected sampleUnits.threadCPUDelta on this platform";; +# endif + } + } + + bool foundMain = false; + bool foundIdle = false; + uint64_t idleThreadCPUDeltaSum = 0u; + bool foundBusy = false; + uint64_t busyThreadCPUDeltaSum = 0u; + + // Check that the sample schema contains "threadCPUDelta". + GET_JSON(threads, aRoot["threads"], Array); + for (const Json::Value& thread : threads) { + ASSERT_TRUE(thread.isObject()); + GET_JSON(name, thread["name"], String); + if (name.asString() == "GeckoMain") { + foundMain = true; + GET_JSON(samples, thread["samples"], Object); + { + Json::ArrayIndex stackIndex = 0; + Json::ArrayIndex threadCPUDeltaIndex = 0; + GET_JSON(schema, samples["schema"], Object); + { + GET_JSON(jsonStackIndex, schema["stack"], UInt); + stackIndex = jsonStackIndex.asUInt(); + GET_JSON(jsonThreadCPUDeltaIndex, schema["threadCPUDelta"], UInt); + threadCPUDeltaIndex = jsonThreadCPUDeltaIndex.asUInt(); + } + + std::set stackLeaves; // To count distinct leaves. + unsigned threadCPUDeltaCount = 0; + GET_JSON(data, samples["data"], Array); + if (testWithNoStackSampling) { + // When not sampling stacks, the first sampling loop will have no + // running times, so it won't output anything. + EXPECT_GE(data.size(), scMinSamplings - 1); + } else { + EXPECT_GE(data.size(), scMinSamplings); + } + for (const Json::Value& sample : data) { + ASSERT_TRUE(sample.isArray()); + if (sample.isValidIndex(stackIndex)) { + if (!sample[stackIndex].isNull()) { + GET_JSON(stack, sample[stackIndex], UInt64); + stackLeaves.insert(stack.asUInt64()); + } + } + if (sample.isValidIndex(threadCPUDeltaIndex)) { + if (!sample[threadCPUDeltaIndex].isNull()) { + EXPECT_TRUE(sample[threadCPUDeltaIndex].isUInt64()); + ++threadCPUDeltaCount; + } + } + } + + if (testWithNoStackSampling) { + // in nostacksampling mode, there should only be one kind of stack + // leaf (the root). + EXPECT_EQ(stackLeaves.size(), 1u); + } else { + // in normal sampling mode, there should be at least one kind of + // stack leaf for each distinct label. + EXPECT_GE(stackLeaves.size(), scSampleLabelCount); + } + +# if defined(GP_OS_windows) || defined(GP_OS_darwin) || \ + defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + EXPECT_GE(threadCPUDeltaCount, data.size() - 1u) + << "There should be 'threadCPUDelta' values in all but 1 " + "samples"; +# else + // All "threadCPUDelta" data should be absent or null on unsupported + // platforms. + EXPECT_EQ(threadCPUDeltaCount, 0u); +# endif + } + } else if (name.asString() == "Idle test") { + foundIdle = true; + size_t samplings; + CountCPUDeltas(thread, samplings, idleThreadCPUDeltaSum); + if (testWithNoStackSampling) { + // When not sampling stacks, the first sampling loop will have no + // running times, so it won't output anything. + EXPECT_GE(samplings, scMinSamplings - 1); + } else { + EXPECT_GE(samplings, scMinSamplings); + } +# if !(defined(GP_OS_windows) || defined(GP_OS_darwin) || \ + defined(GP_OS_linux) || defined(GP_OS_android) || \ + defined(GP_OS_freebsd)) + // All "threadCPUDelta" data should be absent or null on unsupported + // platforms. + EXPECT_EQ(idleThreadCPUDeltaSum, 0u); +# endif + } else if (name.asString() == "Busy test") { + foundBusy = true; + size_t samplings; + CountCPUDeltas(thread, samplings, busyThreadCPUDeltaSum); + if (testWithNoStackSampling) { + // When not sampling stacks, the first sampling loop will have no + // running times, so it won't output anything. + EXPECT_GE(samplings, scMinSamplings - 1); + } else { + EXPECT_GE(samplings, scMinSamplings); + } +# if !(defined(GP_OS_windows) || defined(GP_OS_darwin) || \ + defined(GP_OS_linux) || defined(GP_OS_android) || \ + defined(GP_OS_freebsd)) + // All "threadCPUDelta" data should be absent or null on unsupported + // platforms. + EXPECT_EQ(busyThreadCPUDeltaSum, 0u); +# endif + } + } + + EXPECT_TRUE(foundMain); + EXPECT_TRUE(foundIdle); + EXPECT_TRUE(foundBusy); + EXPECT_LE(idleThreadCPUDeltaSum, busyThreadCPUDeltaSum); + }); + + // Note: There is no non-racy way to test for SamplingState::JustStopped, as + // it would require coordination between `profiler_stop()` and another + // thread doing `profiler_callback_after_sampling()` at just the right + // moment. + + profiler_stop(); + ASSERT_TRUE(!profiler_is_active()); + ASSERT_TRUE(!profiler_callback_after_sampling( + [&](SamplingState) { ASSERT_TRUE(false); })); + } + + testThreadsState = TestThreadsState::STOPPING; + busy.join(); + idle.join(); +} + +TEST(GeckoProfiler, AllThreads) +{ + // Bug 1845606 + #ifdef XP_WIN + if (!IsWin8OrLater()) { + return; + } + #endif + + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + ASSERT_EQ(static_cast(ThreadProfilingFeatures::Any), 1u + 2u + 4u) + << "This test assumes that there are 3 binary choices 1+2+4; " + "Is this test up to date?"; + + for (uint32_t threadFeaturesBinary = 0u; + threadFeaturesBinary <= + static_cast(ThreadProfilingFeatures::Any); + ++threadFeaturesBinary) { + ThreadProfilingFeatures threadFeatures = + static_cast(threadFeaturesBinary); + const bool threadCPU = DoFeaturesIntersect( + threadFeatures, ThreadProfilingFeatures::CPUUtilization); + const bool threadSampling = + DoFeaturesIntersect(threadFeatures, ThreadProfilingFeatures::Sampling); + const bool threadMarkers = + DoFeaturesIntersect(threadFeatures, ThreadProfilingFeatures::Markers); + + ASSERT_TRUE(!profiler_is_active()); + + uint32_t features = ProfilerFeature::StackWalk; + std::string featuresString = "Features: StackWalk Threads"; + if (threadCPU) { + features |= ProfilerFeature::CPUAllThreads; + featuresString += " CPUAllThreads"; + } + if (threadSampling) { + features |= ProfilerFeature::SamplingAllThreads; + featuresString += " SamplingAllThreads"; + } + if (threadMarkers) { + features |= ProfilerFeature::MarkersAllThreads; + featuresString += " MarkersAllThreads"; + } + + SCOPED_TRACE(featuresString.c_str()); + + const char* filters[] = {"GeckoMain", "Selected"}; + + EXPECT_FALSE(profiler_thread_is_being_profiled( + ThreadProfilingFeatures::CPUUtilization)); + EXPECT_FALSE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling)); + EXPECT_FALSE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers)); + EXPECT_FALSE(profiler_thread_is_being_profiled_for_markers()); + + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + features, filters, MOZ_ARRAY_LENGTH(filters), 0); + + EXPECT_TRUE(profiler_thread_is_being_profiled( + ThreadProfilingFeatures::CPUUtilization)); + EXPECT_TRUE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling)); + EXPECT_TRUE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers)); + EXPECT_TRUE(profiler_thread_is_being_profiled_for_markers()); + + // This will signal all threads to stop spinning. + Atomic stopThreads{false}; + + Atomic selectedThreadSpins{0}; + std::thread selectedThread([&]() { + AUTO_PROFILER_REGISTER_THREAD("Selected test thread"); + // Add a label to ensure that we have a non-empty stack, even if native + // stack-walking is not available. + AUTO_PROFILER_LABEL("Selected test thread", PROFILER); + EXPECT_TRUE(profiler_thread_is_being_profiled( + ThreadProfilingFeatures::CPUUtilization)); + EXPECT_TRUE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling)); + EXPECT_TRUE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers)); + EXPECT_TRUE(profiler_thread_is_being_profiled_for_markers()); + while (!stopThreads) { + PROFILER_MARKER_UNTYPED("Spinning Selected!", PROFILER); + ++selectedThreadSpins; + PR_Sleep(PR_MillisecondsToInterval(1)); + } + }); + + Atomic unselectedThreadSpins{0}; + std::thread unselectedThread([&]() { + AUTO_PROFILER_REGISTER_THREAD("Registered test thread"); + // Add a label to ensure that we have a non-empty stack, even if native + // stack-walking is not available. + AUTO_PROFILER_LABEL("Registered test thread", PROFILER); + // This thread is *not* selected for full profiling, but it may still be + // profiled depending on the -allthreads features. + EXPECT_EQ(profiler_thread_is_being_profiled( + ThreadProfilingFeatures::CPUUtilization), + threadCPU); + EXPECT_EQ( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling), + threadSampling); + EXPECT_EQ( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers), + threadMarkers); + EXPECT_EQ(profiler_thread_is_being_profiled_for_markers(), threadMarkers); + while (!stopThreads) { + PROFILER_MARKER_UNTYPED("Spinning Registered!", PROFILER); + ++unselectedThreadSpins; + PR_Sleep(PR_MillisecondsToInterval(1)); + } + }); + + Atomic unregisteredThreadSpins{0}; + std::thread unregisteredThread([&]() { + // No `AUTO_PROFILER_REGISTER_THREAD` here. + EXPECT_FALSE(profiler_thread_is_being_profiled( + ThreadProfilingFeatures::CPUUtilization)); + EXPECT_FALSE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling)); + EXPECT_FALSE( + profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers)); + EXPECT_FALSE(profiler_thread_is_being_profiled_for_markers()); + while (!stopThreads) { + PROFILER_MARKER_UNTYPED("Spinning Unregistered!", PROFILER); + ++unregisteredThreadSpins; + PR_Sleep(PR_MillisecondsToInterval(1)); + } + }); + + // Wait for all threads to have started at least one spin. + while (selectedThreadSpins == 0 || unselectedThreadSpins == 0 || + unregisteredThreadSpins == 0) { + PR_Sleep(PR_MillisecondsToInterval(1)); + } + + // Wait until the sampler has done at least one loop. + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + + // Restart the spin counts, and ensure each threads will do at least one + // more spin each. Since spins are increased after PROFILER_MARKER calls, in + // the worst case, each thread will have attempted to record at least one + // marker. + selectedThreadSpins = 0; + unselectedThreadSpins = 0; + unregisteredThreadSpins = 0; + while (selectedThreadSpins < 1 && unselectedThreadSpins < 1 && + unregisteredThreadSpins < 1) { + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + } + + profiler_pause(); + UniquePtr profile = profiler_get_profile(); + + profiler_stop(); + stopThreads = true; + unregisteredThread.join(); + unselectedThread.join(); + selectedThread.join(); + + JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) { + GET_JSON(threads, aRoot["threads"], Array); + int foundMain = 0; + int foundSelected = 0; + int foundSelectedMarker = 0; + int foundUnselected = 0; + int foundUnselectedMarker = 0; + for (const Json::Value& thread : threads) { + ASSERT_TRUE(thread.isObject()); + GET_JSON(stringTable, thread["stringTable"], Array); + GET_JSON(name, thread["name"], String); + if (name.asString() == "GeckoMain") { + ++foundMain; + // Don't check the main thread further in this test. + + } else if (name.asString() == "Selected test thread") { + ++foundSelected; + + GET_JSON(samples, thread["samples"], Object); + GET_JSON(samplesData, samples["data"], Array); + EXPECT_GT(samplesData.size(), 0u); + + GET_JSON(markers, thread["markers"], Object); + GET_JSON(markersData, markers["data"], Array); + for (const Json::Value& marker : markersData) { + const unsigned int NAME = 0u; + ASSERT_TRUE(marker[NAME].isUInt()); // name id + GET_JSON(name, stringTable[marker[NAME].asUInt()], String); + if (name == "Spinning Selected!") { + ++foundSelectedMarker; + } + } + } else if (name.asString() == "Registered test thread") { + ++foundUnselected; + + GET_JSON(samples, thread["samples"], Object); + GET_JSON(samplesData, samples["data"], Array); + if (threadCPU || threadSampling) { + EXPECT_GT(samplesData.size(), 0u); + } else { + EXPECT_EQ(samplesData.size(), 0u); + } + + GET_JSON(markers, thread["markers"], Object); + GET_JSON(markersData, markers["data"], Array); + for (const Json::Value& marker : markersData) { + const unsigned int NAME = 0u; + ASSERT_TRUE(marker[NAME].isUInt()); // name id + GET_JSON(name, stringTable[marker[NAME].asUInt()], String); + if (name == "Spinning Registered!") { + ++foundUnselectedMarker; + } + } + + } else { + EXPECT_STRNE(name.asString().c_str(), + "Unregistered test thread label"); + } + } + EXPECT_EQ(foundMain, 1); + EXPECT_EQ(foundSelected, 1); + EXPECT_GT(foundSelectedMarker, 0); + EXPECT_EQ(foundUnselected, + (threadCPU || threadSampling || threadMarkers) ? 1 : 0) + << "Unselected thread should only be present if at least one of the " + "allthreads feature is on"; + if (threadMarkers) { + EXPECT_GT(foundUnselectedMarker, 0); + } else { + EXPECT_EQ(foundUnselectedMarker, 0); + } + }); + } +} + +TEST(GeckoProfiler, FailureHandling) +{ + profiler_init_main_thread_id(); + ASSERT_TRUE(profiler_is_main_thread()) + << "This test assumes it runs on the main thread"; + + uint32_t features = ProfilerFeature::StackWalk; + const char* filters[] = {"GeckoMain"}; + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + + // User-defined marker type that generates a failure when streaming JSON. + struct GtestFailingMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("markers-gtest-failing"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter) { + aWriter.SetFailure("boom!"); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + EXPECT_TRUE(profiler_add_marker("Gtest failing marker", + geckoprofiler::category::OTHER, {}, + GtestFailingMarker{})); + + ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted); + profiler_pause(); + + FailureLatchSource failureLatch; + SpliceableChunkedJSONWriter w{failureLatch}; + EXPECT_FALSE(w.Failed()); + ASSERT_FALSE(w.GetFailure()); + + w.Start(); + EXPECT_FALSE(w.Failed()); + ASSERT_FALSE(w.GetFailure()); + + // The marker will cause a failure during this function call. + EXPECT_FALSE(::profiler_stream_json_for_this_process(w).isOk()); + EXPECT_TRUE(w.Failed()); + ASSERT_TRUE(w.GetFailure()); + EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0); + + // Already failed, check that we don't crash or reset the failure. + EXPECT_FALSE(::profiler_stream_json_for_this_process(w).isOk()); + EXPECT_TRUE(w.Failed()); + ASSERT_TRUE(w.GetFailure()); + EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0); + + w.End(); + + profiler_stop(); + + EXPECT_TRUE(w.Failed()); + ASSERT_TRUE(w.GetFailure()); + EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0); + + UniquePtr profile = w.ChunkedWriteFunc().CopyData(); + ASSERT_EQ(profile.get(), nullptr); +} + +TEST(GeckoProfiler, NoMarkerStacks) +{ + uint32_t features = ProfilerFeature::NoMarkerStacks; + const char* filters[] = {"GeckoMain"}; + + ASSERT_TRUE(!profiler_get_profile()); + + // Make sure that profiler_capture_backtrace returns nullptr when the profiler + // is not active. + ASSERT_TRUE(!profiler_capture_backtrace()); + + { + // Start the profiler without the NoMarkerStacks feature and make sure we + // capture stacks. + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, + /* features */ 0, filters, MOZ_ARRAY_LENGTH(filters), 0); + + ASSERT_TRUE(profiler_capture_backtrace()); + profiler_stop(); + } + + // Start the profiler without the NoMarkerStacks feature and make sure we + // don't capture stacks. + profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features, + filters, MOZ_ARRAY_LENGTH(filters), 0); + + // Make sure that the active features has the NoMarkerStacks feature. + mozilla::Maybe activeFeatures = profiler_features_if_active(); + ASSERT_TRUE(activeFeatures.isSome()); + ASSERT_TRUE(ProfilerFeature::HasNoMarkerStacks(*activeFeatures)); + + // Make sure we don't capture stacks. + ASSERT_TRUE(!profiler_capture_backtrace()); + + // Add a marker with a stack to test. + EXPECT_TRUE(profiler_add_marker( + "Text with stack", geckoprofiler::category::OTHER, MarkerStack::Capture(), + geckoprofiler::markers::TextMarker{}, "")); + + UniquePtr profile = profiler_get_profile(); + JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) { + // Check that the meta.configuration.features array contains + // "nomarkerstacks". + GET_JSON(meta, aRoot["meta"], Object); + { + GET_JSON(configuration, meta["configuration"], Object); + { + GET_JSON(features, configuration["features"], Array); + { + EXPECT_EQ(features.size(), 1u); + EXPECT_JSON_ARRAY_CONTAINS(features, String, "nomarkerstacks"); + } + } + } + + // Make sure that the marker we captured doesn't have a stack. + GET_JSON(threads, aRoot["threads"], Array); + { + ASSERT_EQ(threads.size(), 1u); + GET_JSON(thread0, threads[0], Object); + { + GET_JSON(markers, thread0["markers"], Object); + { + GET_JSON(data, markers["data"], Array); + { + const unsigned int NAME = 0u; + const unsigned int PAYLOAD = 5u; + bool foundMarker = false; + GET_JSON(stringTable, thread0["stringTable"], Array); + + for (const Json::Value& marker : data) { + // Even though we only added one marker, some markers like + // NotifyObservers are being added as well. Let's iterate over + // them and make sure that we have the one we added explicitly and + // check its stack doesn't exist. + GET_JSON(name, stringTable[marker[NAME].asUInt()], String); + std::string nameString = name.asString(); + + if (nameString == "Text with stack") { + // Make sure that the marker doesn't have a stack. + foundMarker = true; + EXPECT_FALSE(marker[PAYLOAD].isNull()); + EXPECT_TRUE(marker[PAYLOAD]["stack"].isNull()); + } + } + + EXPECT_TRUE(foundMarker); + } + } + } + } + }); + + profiler_stop(); + + ASSERT_TRUE(!profiler_get_profile()); +} + +#endif // MOZ_GECKO_PROFILER diff --git a/tools/profiler/tests/gtest/LulTest.cpp b/tools/profiler/tests/gtest/LulTest.cpp new file mode 100644 index 0000000000..159a366567 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTest.cpp @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/Atomics.h" +#include "LulMain.h" +#include "GeckoProfiler.h" // for TracingKind +#include "platform-linux-lul.h" // for read_procmaps + +// Set this to 0 to make LUL be completely silent during tests. +// Set it to 1 to get logging output from LUL, presumably for +// the purpose of debugging it. +#define DEBUG_LUL_TEST 0 + +// LUL needs a callback for its logging sink. +static void gtest_logging_sink_for_LulIntegration(const char* str) { + if (DEBUG_LUL_TEST == 0) { + return; + } + // Ignore any trailing \n, since LOG will add one anyway. + size_t n = strlen(str); + if (n > 0 && str[n - 1] == '\n') { + char* tmp = strdup(str); + tmp[n - 1] = 0; + fprintf(stderr, "LUL-in-gtest: %s\n", tmp); + free(tmp); + } else { + fprintf(stderr, "LUL-in-gtest: %s\n", str); + } +} + +TEST(LulIntegration, unwind_consistency) +{ + // Set up LUL and get it to read unwind info for libxul.so, which is + // all we care about here, plus (incidentally) practically every + // other object in the process too. + lul::LUL* lul = new lul::LUL(gtest_logging_sink_for_LulIntegration); + read_procmaps(lul); + + // Run unwind tests and receive information about how many there + // were and how many were successful. + lul->EnableUnwinding(); + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, lul); + EXPECT_TRUE(nTests == 6) << "Unexpected number of tests"; + EXPECT_EQ(nTestsPassed, nTests) << "Not all tests passed"; + + delete lul; +} diff --git a/tools/profiler/tests/gtest/LulTestDwarf.cpp b/tools/profiler/tests/gtest/LulTestDwarf.cpp new file mode 100644 index 0000000000..55373ec093 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp @@ -0,0 +1,2733 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulDwarfInt.h" +#include "LulTestInfrastructure.h" + +using lul_test::CFISection; +using lul_test::test_assembler::kBigEndian; +using lul_test::test_assembler::kLittleEndian; +using lul_test::test_assembler::Label; +using testing::_; +using testing::InSequence; +using testing::Return; +using testing::Sequence; +using testing::Test; + +#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) /**/ +#define PERHAPS_WRITE_EH_FRAME_FILE(name, section) /**/ + +// Set this to 0 to make LUL be completely silent during tests. +// Set it to 1 to get logging output from LUL, presumably for +// the purpose of debugging it. +#define DEBUG_LUL_TEST_DWARF 0 + +// LUL needs a callback for its logging sink. +static void gtest_logging_sink_for_LulTestDwarf(const char* str) { + if (DEBUG_LUL_TEST_DWARF == 0) { + return; + } + // Ignore any trailing \n, since LOG will add one anyway. + size_t n = strlen(str); + if (n > 0 && str[n - 1] == '\n') { + char* tmp = strdup(str); + tmp[n - 1] = 0; + fprintf(stderr, "LUL-in-gtest: %s\n", tmp); + free(tmp); + } else { + fprintf(stderr, "LUL-in-gtest: %s\n", str); + } +} + +namespace lul { + +class MockCallFrameInfoHandler : public CallFrameInfo::Handler { + public: + MOCK_METHOD6(Entry, + bool(size_t offset, uint64 address, uint64 length, uint8 version, + const std::string& augmentation, unsigned return_address)); + MOCK_METHOD2(UndefinedRule, bool(uint64 address, int reg)); + MOCK_METHOD2(SameValueRule, bool(uint64 address, int reg)); + MOCK_METHOD4(OffsetRule, + bool(uint64 address, int reg, int base_register, long offset)); + MOCK_METHOD4(ValOffsetRule, + bool(uint64 address, int reg, int base_register, long offset)); + MOCK_METHOD3(RegisterRule, bool(uint64 address, int reg, int base_register)); + MOCK_METHOD3(ExpressionRule, + bool(uint64 address, int reg, const ImageSlice& expression)); + MOCK_METHOD3(ValExpressionRule, + bool(uint64 address, int reg, const ImageSlice& expression)); + MOCK_METHOD0(End, bool()); + MOCK_METHOD2(PersonalityRoutine, bool(uint64 address, bool indirect)); + MOCK_METHOD2(LanguageSpecificDataArea, bool(uint64 address, bool indirect)); + MOCK_METHOD0(SignalHandler, bool()); +}; + +class MockCallFrameErrorReporter : public CallFrameInfo::Reporter { + public: + MockCallFrameErrorReporter() + : Reporter(gtest_logging_sink_for_LulTestDwarf, "mock filename", + "mock section") {} + MOCK_METHOD2(Incomplete, void(uint64, CallFrameInfo::EntryKind)); + MOCK_METHOD1(EarlyEHTerminator, void(uint64)); + MOCK_METHOD2(CIEPointerOutOfRange, void(uint64, uint64)); + MOCK_METHOD2(BadCIEId, void(uint64, uint64)); + MOCK_METHOD2(UnrecognizedVersion, void(uint64, int version)); + MOCK_METHOD2(UnrecognizedAugmentation, void(uint64, const string&)); + MOCK_METHOD2(InvalidPointerEncoding, void(uint64, uint8)); + MOCK_METHOD2(UnusablePointerEncoding, void(uint64, uint8)); + MOCK_METHOD2(RestoreInCIE, void(uint64, uint64)); + MOCK_METHOD3(BadInstruction, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(NoCFARule, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(EmptyStateStack, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(ClearingCFARule, void(uint64, CallFrameInfo::EntryKind, uint64)); +}; + +struct CFIFixture { + enum { kCFARegister = CallFrameInfo::Handler::kCFARegister }; + + CFIFixture() { + // Default expectations for the data handler. + // + // - Leave Entry and End without expectations, as it's probably a + // good idea to set those explicitly in each test. + // + // - Expect the *Rule functions to not be called, + // so that each test can simply list the calls they expect. + // + // I gather I could use StrictMock for this, but the manual seems + // to suggest using that only as a last resort, and this isn't so + // bad. + EXPECT_CALL(handler, UndefinedRule(_, _)).Times(0); + EXPECT_CALL(handler, SameValueRule(_, _)).Times(0); + EXPECT_CALL(handler, OffsetRule(_, _, _, _)).Times(0); + EXPECT_CALL(handler, ValOffsetRule(_, _, _, _)).Times(0); + EXPECT_CALL(handler, RegisterRule(_, _, _)).Times(0); + EXPECT_CALL(handler, ExpressionRule(_, _, _)).Times(0); + EXPECT_CALL(handler, ValExpressionRule(_, _, _)).Times(0); + EXPECT_CALL(handler, PersonalityRoutine(_, _)).Times(0); + EXPECT_CALL(handler, LanguageSpecificDataArea(_, _)).Times(0); + EXPECT_CALL(handler, SignalHandler()).Times(0); + + // Default expectations for the error/warning reporer. + EXPECT_CALL(reporter, Incomplete(_, _)).Times(0); + EXPECT_CALL(reporter, EarlyEHTerminator(_)).Times(0); + EXPECT_CALL(reporter, CIEPointerOutOfRange(_, _)).Times(0); + EXPECT_CALL(reporter, BadCIEId(_, _)).Times(0); + EXPECT_CALL(reporter, UnrecognizedVersion(_, _)).Times(0); + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, _)).Times(0); + EXPECT_CALL(reporter, InvalidPointerEncoding(_, _)).Times(0); + EXPECT_CALL(reporter, UnusablePointerEncoding(_, _)).Times(0); + EXPECT_CALL(reporter, RestoreInCIE(_, _)).Times(0); + EXPECT_CALL(reporter, BadInstruction(_, _, _)).Times(0); + EXPECT_CALL(reporter, NoCFARule(_, _, _)).Times(0); + EXPECT_CALL(reporter, EmptyStateStack(_, _, _)).Times(0); + EXPECT_CALL(reporter, ClearingCFARule(_, _, _)).Times(0); + } + + MockCallFrameInfoHandler handler; + MockCallFrameErrorReporter reporter; +}; + +class LulDwarfCFI : public CFIFixture, public Test {}; + +TEST_F(LulDwarfCFI, EmptyRegion) { + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + static const char data[1] = {42}; + + ByteReader reader(ENDIANNESS_BIG); + CallFrameInfo parser(data, 0, &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteLength32) { + CFISection section(kBigEndian, 8); + section + // Not even long enough for an initial length. + .D16(0xa0f) + // Padding to keep valgrind happy. We subtract these off when we + // construct the parser. + .D16(0); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size() - 2, &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteLength64) { + CFISection section(kLittleEndian, 4); + section + // An incomplete 64-bit DWARF initial length. + .D32(0xffffffff) + .D32(0x71fbaec2) + // Padding to keep valgrind happy. We subtract these off when we + // construct the parser. + .D32(0); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size() - 4, &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteId32) { + CFISection section(kBigEndian, 8); + section + .D32(3) // Initial length, not long enough for id + .D8(0xd7) + .D8(0xe5) + .D8(0xf1) // incomplete id + .CIEHeader(8727, 3983, 8889, 3, "") + .FinishEntry(); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, BadId32) { + CFISection section(kBigEndian, 8); + section + .D32(0x100) // Initial length + .D32(0xe802fade) // bogus ID + .Append(0x100 - 4, 0x42); // make the length true + section.CIEHeader(1672, 9872, 8529, 3, "").FinishEntry(); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, CIEPointerOutOfRange(_, 0xe802fade)).WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +// A lone CIE shouldn't cause any handler calls. +TEST_F(LulDwarfCFI, SingleCIE) { + CFISection section(kLittleEndian, 4); + section.CIEHeader(0xffe799a8, 0x3398dcdd, 0x6e9683de, 3, ""); + section.Append(10, lul::DW_CFA_nop); + section.FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("SingleCIE", section); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +// One FDE, one CIE. +TEST_F(LulDwarfCFI, OneFDE) { + CFISection section(kBigEndian, 4); + Label cie; + section.Mark(&cie) + .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "") + .FinishEntry() + .FDEHeader(cie, 0x7714740d, 0x3d5a10cd) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("OneFDE", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0x7714740d, 0x3d5a10cd, 3, "", 0x6b6efb87)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +// Two FDEs share a CIE. +TEST_F(LulDwarfCFI, TwoFDEsOneCIE) { + CFISection section(kBigEndian, 4); + Label cie; + section + // First FDE. readelf complains about this one because it makes + // a forward reference to its CIE. + .FDEHeader(cie, 0xa42744df, 0xa3b42121) + .FinishEntry() + // CIE. + .Mark(&cie) + .CIEHeader(0x04f7dc7b, 0x3d00c05f, 0xbd43cb59, 3, "") + .FinishEntry() + // Second FDE. + .FDEHeader(cie, 0x6057d391, 0x700f608d) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsOneCIE", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0xa42744df, 0xa3b42121, 3, "", 0xbd43cb59)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0x6057d391, 0x700f608d, 3, "", 0xbd43cb59)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +// Two FDEs, two CIEs. +TEST_F(LulDwarfCFI, TwoFDEsTwoCIEs) { + CFISection section(kLittleEndian, 8); + Label cie1, cie2; + section + // First CIE. + .Mark(&cie1) + .CIEHeader(0x694d5d45, 0x4233221b, 0xbf45e65a, 3, "") + .FinishEntry() + // First FDE which cites second CIE. readelf complains about + // this one because it makes a forward reference to its CIE. + .FDEHeader(cie2, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL) + .FinishEntry() + // Second FDE, which cites first CIE. + .FDEHeader(cie1, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL) + .FinishEntry() + // Second CIE. + .Mark(&cie2) + .CIEHeader(0xfba3fad7, 0x6287e1fd, 0x61d2c581, 2, "") + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsTwoCIEs", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL, + 2, "", 0x61d2c581)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL, + 3, "", 0xbf45e65a)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +// An FDE whose CIE specifies a version we don't recognize. +TEST_F(LulDwarfCFI, BadVersion) { + CFISection section(kBigEndian, 4); + Label cie1, cie2; + section.Mark(&cie1) + .CIEHeader(0xca878cf0, 0x7698ec04, 0x7b616f54, 0x52, "") + .FinishEntry() + // We should skip this entry, as its CIE specifies a version we + // don't recognize. + .FDEHeader(cie1, 0x08852292, 0x2204004a) + .FinishEntry() + // Despite the above, we should visit this entry. + .Mark(&cie2) + .CIEHeader(0x7c3ae7c9, 0xb9b9a512, 0x96cb3264, 3, "") + .FinishEntry() + .FDEHeader(cie2, 0x2094735a, 0x6e875501) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadVersion", section); + + EXPECT_CALL(reporter, UnrecognizedVersion(_, 0x52)).WillOnce(Return()); + + { + InSequence s; + // We should see no mention of the first FDE, but we should get + // a call to Entry for the second. + EXPECT_CALL(handler, Entry(_, 0x2094735a, 0x6e875501, 3, "", 0x96cb3264)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +// An FDE whose CIE specifies an augmentation we don't recognize. +TEST_F(LulDwarfCFI, BadAugmentation) { + CFISection section(kBigEndian, 4); + Label cie1, cie2; + section.Mark(&cie1) + .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "spaniels!") + .FinishEntry() + // We should skip this entry, as its CIE specifies an + // augmentation we don't recognize. + .FDEHeader(cie1, 0x7714740d, 0x3d5a10cd) + .FinishEntry() + // Despite the above, we should visit this entry. + .Mark(&cie2) + .CIEHeader(0xf8bc4399, 0x8cf09931, 0xf2f519b2, 3, "") + .FinishEntry() + .FDEHeader(cie2, 0x7bf0fda0, 0xcbcd28d8) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadAugmentation", section); + + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "spaniels!")) + .WillOnce(Return()); + + { + InSequence s; + // We should see no mention of the first FDE, but we should get + // a call to Entry for the second. + EXPECT_CALL(handler, Entry(_, 0x7bf0fda0, 0xcbcd28d8, 3, "", 0xf2f519b2)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_FALSE(parser.Start()); +} + +// The return address column field is a byte in CFI version 1 +// (DWARF2), but a ULEB128 value in version 3 (DWARF3). +TEST_F(LulDwarfCFI, CIEVersion1ReturnColumn) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, using the version 1 format: return column is a ubyte. + .Mark(&cie) + // Use a value for the return column that is parsed differently + // as a ubyte and as a ULEB128. + .CIEHeader(0xbcdea24f, 0x5be28286, 0x9f, 1, "") + .FinishEntry() + // FDE, citing that CIE. + .FDEHeader(cie, 0xb8d347b5, 0x825e55dc) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion1ReturnColumn", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0xb8d347b5, 0x825e55dc, 1, "", 0x9f)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +// The return address column field is a byte in CFI version 1 +// (DWARF2), but a ULEB128 value in version 3 (DWARF3). +TEST_F(LulDwarfCFI, CIEVersion3ReturnColumn) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, using the version 3 format: return column is a ULEB128. + .Mark(&cie) + // Use a value for the return column that is parsed differently + // as a ubyte and as a ULEB128. + .CIEHeader(0x0ab4758d, 0xc010fdf7, 0x89, 3, "") + .FinishEntry() + // FDE, citing that CIE. + .FDEHeader(cie, 0x86763f2b, 0x2a66dc23) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion3ReturnColumn", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0x86763f2b, 0x2a66dc23, 3, "", 0x89)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter); + EXPECT_TRUE(parser.Start()); +} + +struct CFIInsnFixture : public CFIFixture { + CFIInsnFixture() : CFIFixture() { + data_factor = 0xb6f; + return_register = 0x9be1ed9f; + version = 3; + cfa_base_register = 0x383a3aa; + cfa_offset = 0xf748; + } + + // Prepare SECTION to receive FDE instructions. + // + // - Append a stock CIE header that establishes the fixture's + // code_factor, data_factor, return_register, version, and + // augmentation values. + // - Have the CIE set up a CFA rule using cfa_base_register and + // cfa_offset. + // - Append a stock FDE header, referring to the above CIE, for the + // fde_size bytes at fde_start. Choose fde_start and fde_size + // appropriately for the section's address size. + // - Set appropriate expectations on handler in sequence s for the + // frame description entry and the CIE's CFA rule. + // + // On return, SECTION is ready to have FDE instructions appended to + // it, and its FinishEntry member called. + void StockCIEAndFDE(CFISection* section) { + // Choose appropriate constants for our address size. + if (section->AddressSize() == 4) { + fde_start = 0xc628ecfbU; + fde_size = 0x5dee04a2; + code_factor = 0x60b; + } else { + assert(section->AddressSize() == 8); + fde_start = 0x0005c57ce7806bd3ULL; + fde_size = 0x2699521b5e333100ULL; + code_factor = 0x01008e32855274a8ULL; + } + + // Create the CIE. + (*section) + .Mark(&cie_label) + .CIEHeader(code_factor, data_factor, return_register, version, "") + .D8(lul::DW_CFA_def_cfa) + .ULEB128(cfa_base_register) + .ULEB128(cfa_offset) + .FinishEntry(); + + // Create the FDE. + section->FDEHeader(cie_label, fde_start, fde_size); + + // Expect an Entry call for the FDE and a ValOffsetRule call for the + // CIE's CFA rule. + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, + cfa_base_register, cfa_offset)) + .InSequence(s) + .WillOnce(Return(true)); + } + + // Run the contents of SECTION through a CallFrameInfo parser, + // expecting parser.Start to return SUCCEEDS. Caller may optionally + // supply, via READER, its own ByteReader. If that's absent, a + // local one is used. + void ParseSection(CFISection* section, bool succeeds = true, + ByteReader* reader = nullptr) { + string contents; + EXPECT_TRUE(section->GetContents(&contents)); + lul::Endianness endianness; + if (section->endianness() == kBigEndian) + endianness = ENDIANNESS_BIG; + else { + assert(section->endianness() == kLittleEndian); + endianness = ENDIANNESS_LITTLE; + } + ByteReader local_reader(endianness); + ByteReader* reader_to_use = reader ? reader : &local_reader; + reader_to_use->SetAddressSize(section->AddressSize()); + CallFrameInfo parser(contents.data(), contents.size(), reader_to_use, + &handler, &reporter); + if (succeeds) + EXPECT_TRUE(parser.Start()); + else + EXPECT_FALSE(parser.Start()); + } + + Label cie_label; + Sequence s; + uint64 code_factor; + int data_factor; + unsigned return_register; + unsigned version; + unsigned cfa_base_register; + int cfa_offset; + uint64 fde_start, fde_size; +}; + +class LulDwarfCFIInsn : public CFIInsnFixture, public Test {}; + +TEST_F(LulDwarfCFIInsn, DW_CFA_set_loc) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_set_loc) + .D32(0xb1ee3e7a) + // Use DW_CFA_def_cfa to force a handler call that we can use to + // check the effect of the DW_CFA_set_loc. + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x4defb431) + .ULEB128(0x6d17b0ee) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_set_loc", section); + + EXPECT_CALL(handler, + ValOffsetRule(0xb1ee3e7a, kCFARegister, 0x4defb431, 0x6d17b0ee)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_advance_loc | 0x2a) + // Use DW_CFA_def_cfa to force a handler call that we can use to + // check the effect of the DW_CFA_advance_loc. + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x5bbb3715) + .ULEB128(0x0186c7bf) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc", section); + + EXPECT_CALL(handler, ValOffsetRule(fde_start + 0x2a * code_factor, + kCFARegister, 0x5bbb3715, 0x0186c7bf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc1) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_advance_loc1) + .D8(0xd8) + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x69d5696a) + .ULEB128(0x1eb7fc93) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc1", section); + + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0xd8 * code_factor), + kCFARegister, 0x69d5696a, 0x1eb7fc93)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc2) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_advance_loc2) + .D16(0x3adb) + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x3a368bed) + .ULEB128(0x3194ee37) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc2", section); + + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x3adb * code_factor), + kCFARegister, 0x3a368bed, 0x3194ee37)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc4) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_advance_loc4) + .D32(0x15813c88) + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x135270c5) + .ULEB128(0x24bad7cb) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc4", section); + + EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x15813c88ULL * code_factor), + kCFARegister, 0x135270c5, 0x24bad7cb)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_MIPS_advance_loc8) { + code_factor = 0x2d; + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_MIPS_advance_loc8) + .D64(0x3c4f3945b92c14ULL) + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0xe17ed602) + .ULEB128(0x3d162e7f) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc8", section); + + EXPECT_CALL(handler, + ValOffsetRule((fde_start + 0x3c4f3945b92c14ULL * code_factor), + kCFARegister, 0xe17ed602, 0x3d162e7f)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa) + .ULEB128(0x4e363a85) + .ULEB128(0x815f9aa7) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_def_cfa", section); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x4e363a85, 0x815f9aa7)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_sf) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_sf) + .ULEB128(0x8ccb32b7) + .LEB128(0x9ea) + .D8(lul::DW_CFA_def_cfa_sf) + .ULEB128(0x9b40f5da) + .LEB128(-0x40a2) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, 0x8ccb32b7, + 0x9ea * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, 0x9b40f5da, + -0x40a2 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_register) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_register).ULEB128(0x3e7e9363).FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x3e7e9363, cfa_offset)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// DW_CFA_def_cfa_register should have no effect when applied to a +// non-base/offset rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_registerBadRule) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + ImageSlice expr("needle in a haystack"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_expression) + .Block(expr) + .D8(lul::DW_CFA_def_cfa_register) + .ULEB128(0xf1b49e49) + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b).FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + 0x1e8e3b9b)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset_sf) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_offset_sf) + .LEB128(0x970) + .D8(lul::DW_CFA_def_cfa_offset_sf) + .LEB128(-0x2cd) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + 0x970 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + -0x2cd * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// DW_CFA_def_cfa_offset should have no effect when applied to a +// non-base/offset rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offsetBadRule) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + ImageSlice expr("six ways to Sunday"); + section.D8(lul::DW_CFA_def_cfa_expression) + .Block(expr) + .D8(lul::DW_CFA_def_cfa_offset) + .ULEB128(0x1e8e3b9b) + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_expression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 8); + ImageSlice expr("eating crow"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_def_cfa_expression).Block(expr).FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_undefined) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_undefined).ULEB128(0x300ce45d).FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x300ce45d)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_same_value) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_same_value).ULEB128(0x3865a760).FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0x3865a760)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x9f6).FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x2c, kCFARegister, 0x9f6 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset_extended) + .ULEB128(0x402b) + .ULEB128(0xb48) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x402b, kCFARegister, 0xb48 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended_sf) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset_extended_sf) + .ULEB128(0x997c23ee) + .LEB128(0x2d00) + .D8(lul::DW_CFA_offset_extended_sf) + .ULEB128(0x9519eb82) + .LEB128(-0xa77) + .FinishEntry(); + + EXPECT_CALL(handler, OffsetRule(fde_start, 0x997c23ee, kCFARegister, + 0x2d00 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start, 0x9519eb82, kCFARegister, + -0xa77 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_offset) + .ULEB128(0x623562fe) + .ULEB128(0x673) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x623562fe, kCFARegister, + 0x673 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset_sf) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_offset_sf) + .ULEB128(0x6f4f) + .LEB128(0xaab) + .D8(lul::DW_CFA_val_offset_sf) + .ULEB128(0x2483) + .LEB128(-0x8a2) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x6f4f, kCFARegister, + 0xaab * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2483, kCFARegister, + -0x8a2 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_register) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_register) + .ULEB128(0x278d18f9) + .ULEB128(0x1a684414) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0x278d18f9, 0x1a684414)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_expression) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + ImageSlice expr("plus ça change, plus c'est la même chose"); + section.D8(lul::DW_CFA_expression) + .ULEB128(0xa1619fb2) + .Block(expr) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0xa1619fb2, expr)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_expression) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + ImageSlice expr("he who has the gold makes the rules"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_expression) + .ULEB128(0xc5e4a9e3) + .Block(expr) + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xc5e4a9e3, expr)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restore) { + CFISection section(kLittleEndian, 8); + code_factor = 0x01bd188a9b1fa083ULL; + data_factor = -0x1ac8; + return_register = 0x8c35b049; + version = 2; + fde_start = 0x2d70fe998298bbb1ULL; + fde_size = 0x46ccc2e63cf0b108ULL; + Label cie; + section.Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, "") + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x6ca1d50e) + .ULEB128(0x372e38e8) + // Provide an offset(N) rule for register 0x3c. + .D8(lul::DW_CFA_offset | 0x3c) + .ULEB128(0xb348) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide a new offset(N) rule for register 0x3c. + .D8(lul::DW_CFA_advance_loc | 0x13) + .D8(lul::DW_CFA_offset | 0x3c) + .ULEB128(0x9a50) + // At a third address, restore the original rule for register 0x3c. + .D8(lul::DW_CFA_advance_loc | 0x01) + .D8(lul::DW_CFA_restore | 0x3c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x6ca1d50e, 0x372e38e8)) + .WillOnce(Return(true)); + // CIE's rule for register 0x3c. + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x3c, kCFARegister, 0xb348 * data_factor)) + .WillOnce(Return(true)); + // FDE's rule for register 0x3c. + EXPECT_CALL(handler, OffsetRule(fde_start + 0x13 * code_factor, 0x3c, + kCFARegister, 0x9a50 * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's rule for register 0x3c. + EXPECT_CALL(handler, OffsetRule(fde_start + (0x13 + 0x01) * code_factor, + 0x3c, kCFARegister, 0xb348 * data_factor)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restoreNoRule) { + CFISection section(kBigEndian, 4); + code_factor = 0x005f78143c1c3b82ULL; + data_factor = 0x25d0; + return_register = 0xe8; + version = 1; + fde_start = 0x4062e30f; + fde_size = 0x5302a389; + Label cie; + section.Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, "") + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x470aa334) + .ULEB128(0x099ef127) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide an offset(N) rule for register 0x2c. + .D8(lul::DW_CFA_advance_loc | 0x7) + .D8(lul::DW_CFA_offset | 0x2c) + .ULEB128(0x1f47) + // At a third address, restore the (missing) CIE rule for register 0x2c. + .D8(lul::DW_CFA_advance_loc | 0xb) + .D8(lul::DW_CFA_restore | 0x2c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x470aa334, 0x099ef127)) + .WillOnce(Return(true)); + // FDE's rule for register 0x2c. + EXPECT_CALL(handler, OffsetRule(fde_start + 0x7 * code_factor, 0x2c, + kCFARegister, 0x1f47 * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's (missing) rule for register 0x2c. + EXPECT_CALL(handler, + SameValueRule(fde_start + (0x7 + 0xb) * code_factor, 0x2c)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restore_extended) { + CFISection section(kBigEndian, 4); + code_factor = 0x126e; + data_factor = -0xd8b; + return_register = 0x77711787; + version = 3; + fde_start = 0x01f55a45; + fde_size = 0x452adb80; + Label cie; + section.Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, "", + true /* dwarf64 */) + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x56fa0edd) + .ULEB128(0x097f78a5) + // Provide an offset(N) rule for register 0x0f9b8a1c. + .D8(lul::DW_CFA_offset_extended) + .ULEB128(0x0f9b8a1c) + .ULEB128(0xc979) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide a new offset(N) rule for reg 0x0f9b8a1c. + .D8(lul::DW_CFA_advance_loc | 0x3) + .D8(lul::DW_CFA_offset_extended) + .ULEB128(0x0f9b8a1c) + .ULEB128(0x3b7b) + // At a third address, restore the original rule for register 0x0f9b8a1c. + .D8(lul::DW_CFA_advance_loc | 0x04) + .D8(lul::DW_CFA_restore_extended) + .ULEB128(0x0f9b8a1c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x56fa0edd, 0x097f78a5)) + .WillOnce(Return(true)); + // CIE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, OffsetRule(fde_start, 0x0f9b8a1c, kCFARegister, + 0xc979 * data_factor)) + .WillOnce(Return(true)); + // FDE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, OffsetRule(fde_start + 0x3 * code_factor, 0x0f9b8a1c, + kCFARegister, 0x3b7b * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, + OffsetRule(fde_start + (0x3 + 0x4) * code_factor, 0x0f9b8a1c, + kCFARegister, 0xc979 * data_factor)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_state) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + + // We create a state, save it, modify it, and then restore. We + // refer to the state that is overridden the restore as the + // "outgoing" state, and the restored state the "incoming" state. + // + // Register outgoing incoming expect + // 1 offset(N) no rule new "same value" rule + // 2 register(R) offset(N) report changed rule + // 3 offset(N) offset(M) report changed offset + // 4 offset(N) offset(N) no report + // 5 offset(N) no rule new "same value" rule + section + // Create the "incoming" state, which we will save and later restore. + .D8(lul::DW_CFA_offset | 2) + .ULEB128(0x9806) + .D8(lul::DW_CFA_offset | 3) + .ULEB128(0x995d) + .D8(lul::DW_CFA_offset | 4) + .ULEB128(0x7055) + .D8(lul::DW_CFA_remember_state) + // Advance to a new instruction; an implementation could legitimately + // ignore all but the final rule for a given register at a given address. + .D8(lul::DW_CFA_advance_loc | 1) + // Create the "outgoing" state, which we will discard. + .D8(lul::DW_CFA_offset | 1) + .ULEB128(0xea1a) + .D8(lul::DW_CFA_register) + .ULEB128(2) + .ULEB128(0x1d2a3767) + .D8(lul::DW_CFA_offset | 3) + .ULEB128(0xdd29) + .D8(lul::DW_CFA_offset | 5) + .ULEB128(0xf1ce) + // At a third address, restore the incoming state. + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + uint64 addr = fde_start; + + // Expect the incoming rules to be reported. + EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 4, kCFARegister, 0x7055 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + + addr += code_factor; + + // After the save, we establish the outgoing rule set. + EXPECT_CALL(handler, OffsetRule(addr, 1, kCFARegister, 0xea1a * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(addr, 2, 0x1d2a3767)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0xdd29 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 5, kCFARegister, 0xf1ce * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + + addr += code_factor; + + // Finally, after the restore, expect to see the differences from + // the outgoing to the incoming rules reported. + EXPECT_CALL(handler, SameValueRule(addr, 1)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(addr, 5)) + .InSequence(s) + .WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// Check that restoring a rule set reports changes to the CFA rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_stateCFA) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + + section.D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_def_cfa_offset) + .ULEB128(0x90481102) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, kCFARegister, + cfa_base_register, 0x90481102)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor * 2, kCFARegister, + cfa_base_register, cfa_offset)) + .InSequence(s) + .WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_nop) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_nop) + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x3fb8d4f1) + .ULEB128(0x078dc67b) + .D8(lul::DW_CFA_nop) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x3fb8d4f1, 0x078dc67b)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_window_save) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_GNU_window_save).FinishEntry(); + + // Don't include all the rules in any particular sequence. + + // The caller's %o0-%o7 have become the callee's %i0-%i7. This is + // the GCC register numbering. + for (int i = 8; i < 16; i++) + EXPECT_CALL(handler, RegisterRule(fde_start, i, i + 16)) + .WillOnce(Return(true)); + // The caller's %l0-%l7 and %i0-%i7 have been saved at the top of + // its frame. + for (int i = 16; i < 32; i++) + EXPECT_CALL(handler, OffsetRule(fde_start, i, kCFARegister, (i - 16) * 4)) + .WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_args_size) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_GNU_args_size) + .ULEB128(0xeddfa520) + // Verify that we see this, meaning we parsed the above properly. + .D8(lul::DW_CFA_offset | 0x23) + .ULEB128(0x269) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x23, kCFARegister, 0x269 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_negative_offset_extended) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_GNU_negative_offset_extended) + .ULEB128(0x430cc87a) + .ULEB128(0x613) + .FinishEntry(); + + EXPECT_CALL(handler, OffsetRule(fde_start, 0x430cc87a, kCFARegister, + -0x613 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// Three FDEs: skip the second +TEST_F(LulDwarfCFIInsn, SkipFDE) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, used by all FDEs. + .Mark(&cie) + .CIEHeader(0x010269f2, 0x9177, 0xedca5849, 2, "") + .D8(lul::DW_CFA_def_cfa) + .ULEB128(0x42ed390b) + .ULEB128(0x98f43aad) + .FinishEntry() + // First FDE. + .FDEHeader(cie, 0xa870ebdd, 0x60f6aa4) + .D8(lul::DW_CFA_register) + .ULEB128(0x3a860351) + .ULEB128(0x6c9a6bcf) + .FinishEntry() + // Second FDE. + .FDEHeader(cie, 0xc534f7c0, 0xf6552e9, true /* dwarf64 */) + .D8(lul::DW_CFA_register) + .ULEB128(0x1b62c234) + .ULEB128(0x26586b18) + .FinishEntry() + // Third FDE. + .FDEHeader(cie, 0xf681cfc8, 0x7e4594e) + .D8(lul::DW_CFA_register) + .ULEB128(0x26c53934) + .ULEB128(0x18eeb8a4) + .FinishEntry(); + + { + InSequence s; + + // Process the first FDE. + EXPECT_CALL(handler, Entry(_, 0xa870ebdd, 0x60f6aa4, 2, "", 0xedca5849)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + ValOffsetRule(0xa870ebdd, kCFARegister, 0x42ed390b, 0x98f43aad)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(0xa870ebdd, 0x3a860351, 0x6c9a6bcf)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + // Skip the second FDE. + EXPECT_CALL(handler, Entry(_, 0xc534f7c0, 0xf6552e9, 2, "", 0xedca5849)) + .WillOnce(Return(false)); + + // Process the third FDE. + EXPECT_CALL(handler, Entry(_, 0xf681cfc8, 0x7e4594e, 2, "", 0xedca5849)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + ValOffsetRule(0xf681cfc8, kCFARegister, 0x42ed390b, 0x98f43aad)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(0xf681cfc8, 0x26c53934, 0x18eeb8a4)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +// Quit processing in the middle of an entry's instructions. +TEST_F(LulDwarfCFIInsn, QuitMidentry) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_register) + .ULEB128(0xe0cf850d) + .ULEB128(0x15aab431) + .D8(lul::DW_CFA_expression) + .ULEB128(0x46750aa5) + .Block("meat") + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xe0cf850d, 0x15aab431)) + .InSequence(s) + .WillOnce(Return(false)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, false); +} + +class LulDwarfCFIRestore : public CFIInsnFixture, public Test {}; + +TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_undefined) + .ULEB128(0x0bac878e) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x0bac878e)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_undefined) + .ULEB128(0x7dedff5f) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_same_value) + .ULEB128(0x7dedff5f) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x7dedff5f)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(fde_start + code_factor, 0x7dedff5f)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + 2 * code_factor, 0x7dedff5f)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_same_value) + .ULEB128(0xadbc9b3a) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0xadbc9b3a)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_same_value) + .ULEB128(0x3d90dcb5) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0x3d90dcb5) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0x3d90dcb5)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x3d90dcb5)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(fde_start + 2 * code_factor, 0x3d90dcb5)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset | 0x14) + .ULEB128(0xb6f) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x14, kCFARegister, 0xb6f * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset | 0x21) + .ULEB128(0xeb7) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0x21) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x21, kCFARegister, 0xeb7 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x21)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChangedOffset) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_offset | 0x21) + .ULEB128(0x134) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_offset | 0x21) + .ULEB128(0xf4f) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x21, kCFARegister, 0x134 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + code_factor, 0x21, kCFARegister, + 0xf4f * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21, + kCFARegister, 0x134 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_offset) + .ULEB128(0x829caee6) + .ULEB128(0xe4c) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x829caee6, kCFARegister, + 0xe4c * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_offset) + .ULEB128(0xf17c36d6) + .ULEB128(0xeb7) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0xf17c36d6) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0xf17c36d6, kCFARegister, + 0xeb7 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xf17c36d6)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0xf17c36d6, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChangedValOffset) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_offset) + .ULEB128(0x2cf0ab1b) + .ULEB128(0x562) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_val_offset) + .ULEB128(0x2cf0ab1b) + .ULEB128(0xe88) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2cf0ab1b, kCFARegister, + 0x562 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, 0x2cf0ab1b, + kCFARegister, 0xe88 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0x2cf0ab1b, + kCFARegister, 0x562 * data_factor)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_register) + .ULEB128(0x77514acc) + .ULEB128(0x464de4ce) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0x77514acc, 0x464de4ce)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_register) + .ULEB128(0xe39acce5) + .ULEB128(0x095f1559) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0xe39acce5) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xe39acce5, 0x095f1559)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xe39acce5)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + RegisterRule(fde_start + 2 * code_factor, 0xe39acce5, 0x095f1559)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChangedRegister) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_register) + .ULEB128(0xd40e21b1) + .ULEB128(0x16607d6a) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_register) + .ULEB128(0xd40e21b1) + .ULEB128(0xbabb4742) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xd40e21b1, 0x16607d6a)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + RegisterRule(fde_start + code_factor, 0xd40e21b1, 0xbabb4742)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + RegisterRule(fde_start + 2 * code_factor, 0xd40e21b1, 0x16607d6a)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleUnchanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + ImageSlice dwarf("dwarf"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_expression) + .ULEB128(0x666ae152) + .Block("dwarf") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0x666ae152, dwarf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + ImageSlice elf("elf"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_expression) + .ULEB128(0xb5ca5c46) + .Block(elf) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0xb5ca5c46) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0xb5ca5c46, elf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + ExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46, elf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChangedExpression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + ImageSlice smurf("smurf"); + ImageSlice orc("orc"); + section.D8(lul::DW_CFA_expression) + .ULEB128(0x500f5739) + .Block(smurf) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_expression) + .ULEB128(0x500f5739) + .Block(orc) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0x500f5739, smurf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ExpressionRule(fde_start + code_factor, 0x500f5739, orc)) + .InSequence(s) + .WillOnce(Return(true)); + // Expectations are not wishes. + EXPECT_CALL(handler, + ExpressionRule(fde_start + 2 * code_factor, 0x500f5739, smurf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleUnchanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + ImageSlice hideous("hideous"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_expression) + .ULEB128(0x666ae152) + .Block(hideous) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x666ae152, hideous)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + ImageSlice revolting("revolting"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_expression) + .ULEB128(0xb5ca5c46) + .Block(revolting) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined) + .ULEB128(0xb5ca5c46) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChanged", section); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xb5ca5c46, revolting)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, + 0xb5ca5c46, revolting)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChangedValExpression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + ImageSlice repulsive("repulsive"); + ImageSlice nauseous("nauseous"); + StockCIEAndFDE(§ion); + section.D8(lul::DW_CFA_val_expression) + .ULEB128(0x500f5739) + .Block(repulsive) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_val_expression) + .ULEB128(0x500f5739) + .Block(nauseous) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChangedValExpression", + section); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x500f5739, repulsive)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, + ValExpressionRule(fde_start + code_factor, 0x500f5739, nauseous)) + .InSequence(s) + .WillOnce(Return(true)); + // Expectations are not wishes. + EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, + 0x500f5739, repulsive)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +struct EHFrameFixture : public CFIInsnFixture { + EHFrameFixture() : CFIInsnFixture(), section(kBigEndian, 4, true) { + encoded_pointer_bases.cfi = 0x7f496cb2; + encoded_pointer_bases.text = 0x540f67b6; + encoded_pointer_bases.data = 0xe3eab768; + section.SetEncodedPointerBases(encoded_pointer_bases); + } + CFISection section; + CFISection::EncodedPointerBases encoded_pointer_bases; + + // Parse CFIInsnFixture::ParseSection, but parse the section as + // .eh_frame data, supplying stock base addresses. + void ParseEHFrameSection(CFISection* section, bool succeeds = true) { + EXPECT_TRUE(section->ContainsEHFrame()); + string contents; + EXPECT_TRUE(section->GetContents(&contents)); + lul::Endianness endianness; + if (section->endianness() == kBigEndian) + endianness = ENDIANNESS_BIG; + else { + assert(section->endianness() == kLittleEndian); + endianness = ENDIANNESS_LITTLE; + } + ByteReader reader(endianness); + reader.SetAddressSize(section->AddressSize()); + reader.SetCFIDataBase(encoded_pointer_bases.cfi, contents.data()); + reader.SetTextBase(encoded_pointer_bases.text); + reader.SetDataBase(encoded_pointer_bases.data); + CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler, + &reporter, true); + if (succeeds) + EXPECT_TRUE(parser.Start()); + else + EXPECT_FALSE(parser.Start()); + } +}; + +class LulDwarfEHFrame : public EHFrameFixture, public Test {}; + +// A simple CIE, an FDE, and a terminator. +TEST_F(LulDwarfEHFrame, Terminator) { + Label cie; + section.Mark(&cie) + .CIEHeader(9968, 2466, 67, 1, "") + .D8(lul::DW_CFA_def_cfa) + .ULEB128(3772) + .ULEB128(1372) + .FinishEntry() + .FDEHeader(cie, 0x848037a1, 0x7b30475e) + .D8(lul::DW_CFA_set_loc) + .D32(0x17713850) + .D8(lul::DW_CFA_undefined) + .ULEB128(5721) + .FinishEntry() + .D32(0) // Terminate the sequence. + // This FDE should be ignored. + .FDEHeader(cie, 0xf19629fe, 0x439fb09b) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.Terminator", section); + + EXPECT_CALL(handler, Entry(_, 0x848037a1, 0x7b30475e, 1, "", 67)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x848037a1, kCFARegister, 3772, 1372)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x17713850, 5721)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(reporter, EarlyEHTerminator(_)).InSequence(s).WillOnce(Return()); + + ParseEHFrameSection(§ion); +} + +// The parser should recognize the Linux Standards Base 'z' augmentations. +TEST_F(LulDwarfEHFrame, SimpleFDE) { + lul::DwarfPointerEncoding lsda_encoding = lul::DwarfPointerEncoding( + lul::DW_EH_PE_indirect | lul::DW_EH_PE_datarel | lul::DW_EH_PE_sdata2); + lul::DwarfPointerEncoding fde_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_udata2); + + section.SetPointerEncoding(fde_encoding); + section.SetEncodedPointerBases(encoded_pointer_bases); + Label cie; + section.Mark(&cie) + .CIEHeader(4873, 7012, 100, 1, "zSLPR") + .ULEB128(7) // Augmentation data length + .D8(lsda_encoding) // LSDA pointer format + .D8(lul::DW_EH_PE_pcrel) // personality pointer format + .EncodedPointer(0x97baa00, lul::DW_EH_PE_pcrel) // and value + .D8(fde_encoding) // FDE pointer format + .D8(lul::DW_CFA_def_cfa) + .ULEB128(6706) + .ULEB128(31) + .FinishEntry() + .FDEHeader(cie, 0x540f6b56, 0xf686) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xe3eab475, lsda_encoding) // LSDA pointer, signed + .D8(lul::DW_CFA_set_loc) + .EncodedPointer(0x540fa4ce, fde_encoding) + .D8(lul::DW_CFA_undefined) + .ULEB128(0x675e) + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.SimpleFDE", section); + + EXPECT_CALL(handler, Entry(_, 0x540f6b56, 0xf686, 1, "zSLPR", 100)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0x97baa00, false)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xe3eab475, true)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()).InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x540f6b56, kCFARegister, 6706, 31)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x540fa4ce, 0x675e)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we can handle an empty 'z' augmentation. +TEST_F(LulDwarfEHFrame, EmptyZ) { + Label cie; + section.Mark(&cie) + .CIEHeader(5955, 5805, 228, 1, "z") + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_def_cfa) + .ULEB128(3629) + .ULEB128(247) + .FinishEntry() + .FDEHeader(cie, 0xda007738, 0xfb55c641) + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_advance_loc1) + .D8(11) + .D8(lul::DW_CFA_undefined) + .ULEB128(3769) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.EmptyZ", section); + + EXPECT_CALL(handler, Entry(_, 0xda007738, 0xfb55c641, 1, "z", 228)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0xda007738, kCFARegister, 3629, 247)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0xda007738 + 11 * 5955, 3769)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we recognize bad 'z' augmentation characters. +TEST_F(LulDwarfEHFrame, BadZ) { + Label cie; + section.Mark(&cie) + .CIEHeader(6937, 1045, 142, 1, "zQ") + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_def_cfa) + .ULEB128(9006) + .ULEB128(7725) + .FinishEntry() + .FDEHeader(cie, 0x1293efa8, 0x236f53f2) + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_advance_loc | 12) + .D8(lul::DW_CFA_register) + .ULEB128(5667) + .ULEB128(3462) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.BadZ", section); + + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "zQ")).WillOnce(Return()); + + ParseEHFrameSection(§ion, false); +} + +TEST_F(LulDwarfEHFrame, zL) { + Label cie; + lul::DwarfPointerEncoding lsda_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_funcrel | lul::DW_EH_PE_udata2); + section.Mark(&cie) + .CIEHeader(9285, 9959, 54, 1, "zL") + .ULEB128(1) // Augmentation data length + .D8(lsda_encoding) // encoding for LSDA pointer in FDE + + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xd40099cd, lsda_encoding) // LSDA pointer + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zL", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zL", 54)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xd40099cd, false)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zP) { + Label cie; + lul::DwarfPointerEncoding personality_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_datarel | lul::DW_EH_PE_udata2); + section.Mark(&cie) + .CIEHeader(1097, 6313, 17, 1, "zP") + .ULEB128(3) // Augmentation data length + .D8(personality_encoding) // encoding for personality routine + .EncodedPointer(0xe3eaccac, personality_encoding) // value + .FinishEntry() + .FDEHeader(cie, 0x0c8350c9, 0xbef11087) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zP", section); + + EXPECT_CALL(handler, Entry(_, 0x0c8350c9, 0xbef11087, 1, "zP", 17)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0xe3eaccac, false)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zR) { + Label cie; + lul::DwarfPointerEncoding pointer_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_sdata2); + section.SetPointerEncoding(pointer_encoding); + section.Mark(&cie) + .CIEHeader(8011, 5496, 75, 1, "zR") + .ULEB128(1) // Augmentation data length + .D8(pointer_encoding) // encoding for FDE addresses + .FinishEntry() + .FDEHeader(cie, 0x540f9431, 0xbd0) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zR", section); + + EXPECT_CALL(handler, Entry(_, 0x540f9431, 0xbd0, 1, "zR", 75)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zS) { + Label cie; + section.Mark(&cie) + .CIEHeader(9217, 7694, 57, 1, "zS") + .ULEB128(0) // Augmentation data length + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zS", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zS", 57)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()).InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// These tests require manual inspection of the test output. +struct CFIReporterFixture { + CFIReporterFixture() + : reporter(gtest_logging_sink_for_LulTestDwarf, "test file name", + "test section name") {} + CallFrameInfo::Reporter reporter; +}; + +class LulDwarfCFIReporter : public CFIReporterFixture, public Test {}; + +TEST_F(LulDwarfCFIReporter, Incomplete) { + reporter.Incomplete(0x0102030405060708ULL, CallFrameInfo::kUnknown); +} + +TEST_F(LulDwarfCFIReporter, EarlyEHTerminator) { + reporter.EarlyEHTerminator(0x0102030405060708ULL); +} + +TEST_F(LulDwarfCFIReporter, CIEPointerOutOfRange) { + reporter.CIEPointerOutOfRange(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, BadCIEId) { + reporter.BadCIEId(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, UnrecognizedVersion) { + reporter.UnrecognizedVersion(0x0123456789abcdefULL, 43); +} + +TEST_F(LulDwarfCFIReporter, UnrecognizedAugmentation) { + reporter.UnrecognizedAugmentation(0x0123456789abcdefULL, "poodles"); +} + +TEST_F(LulDwarfCFIReporter, InvalidPointerEncoding) { + reporter.InvalidPointerEncoding(0x0123456789abcdefULL, 0x42); +} + +TEST_F(LulDwarfCFIReporter, UnusablePointerEncoding) { + reporter.UnusablePointerEncoding(0x0123456789abcdefULL, 0x42); +} + +TEST_F(LulDwarfCFIReporter, RestoreInCIE) { + reporter.RestoreInCIE(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, BadInstruction) { + reporter.BadInstruction(0x0123456789abcdefULL, CallFrameInfo::kFDE, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, NoCFARule) { + reporter.NoCFARule(0x0123456789abcdefULL, CallFrameInfo::kCIE, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, EmptyStateStack) { + reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kTerminator, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, ClearingCFARule) { + reporter.ClearingCFARule(0x0123456789abcdefULL, CallFrameInfo::kFDE, + 0xfedcba9876543210ULL); +} +class LulDwarfExpr : public Test {}; + +class MockSummariser : public Summariser { + public: + MockSummariser() : Summariser(nullptr, 0, nullptr) {} + MOCK_METHOD2(Entry, void(uintptr_t, uintptr_t)); + MOCK_METHOD0(End, void()); + MOCK_METHOD5(Rule, void(uintptr_t, int, LExprHow, int16_t, int64_t)); + MOCK_METHOD1(AddPfxInstr, uint32_t(PfxInstr)); +}; + +TEST_F(LulDwarfExpr, SimpleTransliteration) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section.D8(DW_OP_lit0) + .D8(DW_OP_lit31) + .D8(DW_OP_breg0 + 17) + .LEB128(-1234) + .D8(DW_OP_const4s) + .D32(0xFEDC9876) + .D8(DW_OP_deref) + .D8(DW_OP_and) + .D8(DW_OP_plus) + .D8(DW_OP_minus) + .D8(DW_OP_shl) + .D8(DW_OP_ge); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + // DW_OP_lit0 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0))); + // DW_OP_lit31 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 31))); + // DW_OP_breg17 -1234 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_DwReg, 17))); + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, -1234))); + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add))); + // DW_OP_const4s 0xFEDC9876 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0xFEDC9876))); + // DW_OP_deref + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Deref))); + // DW_OP_and + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_And))); + // DW_OP_plus + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add))); + // DW_OP_minus + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Sub))); + // DW_OP_shl + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Shl))); + // DW_OP_ge + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_CmpGES))); + // required end marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_End))); + } + + int32_t ix = + parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false); + EXPECT_TRUE(ix >= 0); +} + +TEST_F(LulDwarfExpr, UnknownOpcode) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section.D8(DW_OP_lo_user - 1); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + } + + int32_t ix = + parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false); + EXPECT_TRUE(ix == -1); +} + +TEST_F(LulDwarfExpr, ExpressionOverrun) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section.D8(DW_OP_const4s).D8(0x12).D8(0x34).D8(0x56); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + // DW_OP_const4s followed by 3 (a.k.a. not enough) bytes + // We expect PfxInstr(PX_Simm32, not-known-for-sure-32-bit-immediate) + // Hence must use _ as the argument. + EXPECT_CALL(summ, AddPfxInstr(_)); + } + + int32_t ix = + parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false); + EXPECT_TRUE(ix == -1); +} + +// We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests, +// and those names are arch-specific, so a bit of macro magic is helpful. +#if defined(GP_ARCH_arm) +# define TESTED_REG_STRUCT_NAME r11 +# define TESTED_REG_DWARF_NAME DW_REG_ARM_R11 +#elif defined(GP_ARCH_arm64) +# define TESTED_REG_STRUCT_NAME x29 +# define TESTED_REG_DWARF_NAME DW_REG_AARCH64_X29 +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) +# define TESTED_REG_STRUCT_NAME xbp +# define TESTED_REG_DWARF_NAME DW_REG_INTEL_XBP +#else +# error "Unknown plat" +#endif + +struct EvaluatePfxExprFixture { + // Creates: + // initial stack, AVMA 0x12345678, at offset 4 bytes = 0xdeadbeef + // initial regs, with XBP = 0x14141356 + // initial CFA = 0x5432ABCD + EvaluatePfxExprFixture() { + // The test stack. + si.mStartAvma = 0x12345678; + si.mLen = 0; +#define XX(_byte) \ + do { \ + si.mContents[si.mLen++] = (_byte); \ + } while (0) + XX(0x55); + XX(0x55); + XX(0x55); + XX(0x55); + if (sizeof(void*) == 8) { + // le64 + XX(0xEF); + XX(0xBE); + XX(0xAD); + XX(0xDE); + XX(0); + XX(0); + XX(0); + XX(0); + } else { + // le32 + XX(0xEF); + XX(0xBE); + XX(0xAD); + XX(0xDE); + } + XX(0xAA); + XX(0xAA); + XX(0xAA); + XX(0xAA); +#undef XX + // The initial CFA. + initialCFA = TaggedUWord(0x5432ABCD); + // The initial register state. + memset(®s, 0, sizeof(regs)); + regs.TESTED_REG_STRUCT_NAME = TaggedUWord(0x14141356); + } + + StackImage si; + TaggedUWord initialCFA; + UnwindRegs regs; +}; + +class LulDwarfEvaluatePfxExpr : public EvaluatePfxExprFixture, public Test {}; + +TEST_F(LulDwarfEvaluatePfxExpr, NormalEvaluation) { + vector instrs; + // Put some junk at the start of the insn sequence. + instrs.push_back(PfxInstr(PX_End)); + instrs.push_back(PfxInstr(PX_End)); + + // Now the real sequence + // stack is empty + instrs.push_back(PfxInstr(PX_Start, 1)); + // 0x5432ABCD + instrs.push_back(PfxInstr(PX_SImm32, 0x31415927)); + // 0x5432ABCD 0x31415927 + instrs.push_back(PfxInstr(PX_DwReg, TESTED_REG_DWARF_NAME)); + // 0x5432ABCD 0x31415927 0x14141356 + instrs.push_back(PfxInstr(PX_SImm32, 42)); + // 0x5432ABCD 0x31415927 0x14141356 42 + instrs.push_back(PfxInstr(PX_Sub)); + // 0x5432ABCD 0x31415927 0x1414132c + instrs.push_back(PfxInstr(PX_Add)); + // 0x5432ABCD 0x45556c53 + instrs.push_back(PfxInstr(PX_SImm32, si.mStartAvma + 4)); + // 0x5432ABCD 0x45556c53 0x1234567c + instrs.push_back(PfxInstr(PX_Deref)); + // 0x5432ABCD 0x45556c53 0xdeadbeef + instrs.push_back(PfxInstr(PX_SImm32, 0xFE01DC23)); + // 0x5432ABCD 0x45556c53 0xdeadbeef 0xFE01DC23 + instrs.push_back(PfxInstr(PX_And)); + // 0x5432ABCD 0x45556c53 0xde019c23 + instrs.push_back(PfxInstr(PX_SImm32, 7)); + // 0x5432ABCD 0x45556c53 0xde019c23 7 + instrs.push_back(PfxInstr(PX_Shl)); + // 0x5432ABCD 0x45556c53 0x6f00ce1180 + instrs.push_back(PfxInstr(PX_SImm32, 0x7fffffff)); + // 0x5432ABCD 0x45556c53 0x6f00ce1180 7fffffff + instrs.push_back(PfxInstr(PX_And)); + // 0x5432ABCD 0x45556c53 0x00ce1180 + instrs.push_back(PfxInstr(PX_Add)); + // 0x5432ABCD 0x46237dd3 + instrs.push_back(PfxInstr(PX_Sub)); + // 0xe0f2dfa + + instrs.push_back(PfxInstr(PX_End)); + + TaggedUWord res = EvaluatePfxExpr(2 /*offset of start insn*/, ®s, + initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res.Value() == 0xe0f2dfa); +} + +TEST_F(LulDwarfEvaluatePfxExpr, EmptySequence) { + vector instrs; + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, BogusStartPoint) { + vector instrs; + instrs.push_back(PfxInstr(PX_SImm32, 42)); + instrs.push_back(PfxInstr(PX_SImm32, 24)); + instrs.push_back(PfxInstr(PX_SImm32, 4224)); + TaggedUWord res = EvaluatePfxExpr(1, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, MissingEndMarker) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_SImm32, 24)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackUnderflow) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackNoUnderflow) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 1 /*push the initial CFA*/)); + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == initialCFA); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackOverflow) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + for (int i = 0; i < 10 + 1; i++) { + instrs.push_back(PfxInstr(PX_SImm32, i + 100)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackNoOverflow) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + for (int i = 0; i < 10 + 0; i++) { + instrs.push_back(PfxInstr(PX_SImm32, i + 100)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == TaggedUWord(109)); +} + +TEST_F(LulDwarfEvaluatePfxExpr, OutOfRangeShl) { + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_SImm32, 1234)); + instrs.push_back(PfxInstr(PX_SImm32, 5678)); + instrs.push_back(PfxInstr(PX_Shl)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(!res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, TestCmpGES) { + const int32_t argsL[6] = {0, 0, 1, -2, -1, -2}; + const int32_t argsR[6] = {0, 1, 0, -2, -2, -1}; + // expecting: t f t t t f = 101110 = 0x2E + vector instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + // The "running total" + instrs.push_back(PfxInstr(PX_SImm32, 0)); + for (unsigned int i = 0; i < sizeof(argsL) / sizeof(argsL[0]); i++) { + // Shift the "running total" at the bottom of the stack left by one bit + instrs.push_back(PfxInstr(PX_SImm32, 1)); + instrs.push_back(PfxInstr(PX_Shl)); + // Push both test args and do the comparison + instrs.push_back(PfxInstr(PX_SImm32, argsL[i])); + instrs.push_back(PfxInstr(PX_SImm32, argsR[i])); + instrs.push_back(PfxInstr(PX_CmpGES)); + // Or the result into the running total + instrs.push_back(PfxInstr(PX_Or)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == TaggedUWord(0x2E)); +} + +} // namespace lul diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.cpp b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp new file mode 100644 index 0000000000..6d49557e9c --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp @@ -0,0 +1,498 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// Derived from: +// test_assembler.cc: Implementation of google_breakpad::TestAssembler. +// See test_assembler.h for details. + +// Derived from: +// cfi_assembler.cc: Implementation of google_breakpad::CFISection class. +// See cfi_assembler.h for details. + +#include "LulTestInfrastructure.h" + +#include "LulDwarfInt.h" + +#include + +namespace lul_test { +namespace test_assembler { + +using std::back_insert_iterator; + +Label::Label() : value_(new Binding()) {} +Label::Label(uint64_t value) : value_(new Binding(value)) {} +Label::Label(const Label& label) { + value_ = label.value_; + value_->Acquire(); +} +Label::~Label() { + if (value_->Release()) delete value_; +} + +Label& Label::operator=(uint64_t value) { + value_->Set(NULL, value); + return *this; +} + +Label& Label::operator=(const Label& label) { + value_->Set(label.value_, 0); + return *this; +} + +Label Label::operator+(uint64_t addend) const { + Label l; + l.value_->Set(this->value_, addend); + return l; +} + +Label Label::operator-(uint64_t subtrahend) const { + Label l; + l.value_->Set(this->value_, -subtrahend); + return l; +} + +// When NDEBUG is #defined, assert doesn't evaluate its argument. This +// means you can't simply use assert to check the return value of a +// function with necessary side effects. +// +// ALWAYS_EVALUATE_AND_ASSERT(x) evaluates x regardless of whether +// NDEBUG is #defined; when NDEBUG is not #defined, it further asserts +// that x is true. +#ifdef NDEBUG +# define ALWAYS_EVALUATE_AND_ASSERT(x) x +#else +# define ALWAYS_EVALUATE_AND_ASSERT(x) assert(x) +#endif + +uint64_t Label::operator-(const Label& label) const { + uint64_t offset; + ALWAYS_EVALUATE_AND_ASSERT(IsKnownOffsetFrom(label, &offset)); + return offset; +} + +bool Label::IsKnownConstant(uint64_t* value_p) const { + Binding* base; + uint64_t addend; + value_->Get(&base, &addend); + if (base != NULL) return false; + if (value_p) *value_p = addend; + return true; +} + +bool Label::IsKnownOffsetFrom(const Label& label, uint64_t* offset_p) const { + Binding *label_base, *this_base; + uint64_t label_addend, this_addend; + label.value_->Get(&label_base, &label_addend); + value_->Get(&this_base, &this_addend); + // If this and label are related, Get will find their final + // common ancestor, regardless of how indirect the relation is. This + // comparison also handles the constant vs. constant case. + if (this_base != label_base) return false; + if (offset_p) *offset_p = this_addend - label_addend; + return true; +} + +Label::Binding::Binding() : base_(this), addend_(), reference_count_(1) {} + +Label::Binding::Binding(uint64_t addend) + : base_(NULL), addend_(addend), reference_count_(1) {} + +Label::Binding::~Binding() { + assert(reference_count_ == 0); + if (base_ && base_ != this && base_->Release()) delete base_; +} + +void Label::Binding::Set(Binding* binding, uint64_t addend) { + if (!base_ && !binding) { + // We're equating two constants. This could be okay. + assert(addend_ == addend); + } else if (!base_) { + // We are a known constant, but BINDING may not be, so turn the + // tables and try to set BINDING's value instead. + binding->Set(NULL, addend_ - addend); + } else { + if (binding) { + // Find binding's final value. Since the final value is always either + // completely unconstrained or a constant, never a reference to + // another variable (otherwise, it wouldn't be final), this + // guarantees we won't create cycles here, even for code like this: + // l = m, m = n, n = l; + uint64_t binding_addend; + binding->Get(&binding, &binding_addend); + addend += binding_addend; + } + + // It seems likely that setting a binding to itself is a bug + // (although I can imagine this might turn out to be helpful to + // permit). + assert(binding != this); + + if (base_ != this) { + // Set the other bindings on our chain as well. Note that this + // is sufficient even though binding relationships form trees: + // All binding operations traverse their chains to the end, and + // all bindings related to us share some tail of our chain, so + // they will see the changes we make here. + base_->Set(binding, addend - addend_); + // We're not going to use base_ any more. + if (base_->Release()) delete base_; + } + + // Adopt BINDING as our base. Note that it should be correct to + // acquire here, after the release above, even though the usual + // reference-counting rules call for acquiring first, and then + // releasing: the self-reference assertion above should have + // complained if BINDING were 'this' or anywhere along our chain, + // so we didn't release BINDING. + if (binding) binding->Acquire(); + base_ = binding; + addend_ = addend; + } +} + +void Label::Binding::Get(Binding** base, uint64_t* addend) { + if (base_ && base_ != this) { + // Recurse to find the end of our reference chain (the root of our + // tree), and then rewrite every binding along the chain to refer + // to it directly, adjusting addends appropriately. (This is why + // this member function isn't this-const.) + Binding* final_base; + uint64_t final_addend; + base_->Get(&final_base, &final_addend); + if (final_base) final_base->Acquire(); + if (base_->Release()) delete base_; + base_ = final_base; + addend_ += final_addend; + } + *base = base_; + *addend = addend_; +} + +template +static inline void InsertEndian(test_assembler::Endianness endianness, + size_t size, uint64_t number, Inserter dest) { + assert(size > 0); + if (endianness == kLittleEndian) { + for (size_t i = 0; i < size; i++) { + *dest++ = (char)(number & 0xff); + number >>= 8; + } + } else { + assert(endianness == kBigEndian); + // The loop condition is odd, but it's correct for size_t. + for (size_t i = size - 1; i < size; i--) + *dest++ = (char)((number >> (i * 8)) & 0xff); + } +} + +Section& Section::Append(Endianness endianness, size_t size, uint64_t number) { + InsertEndian(endianness, size, number, + back_insert_iterator(contents_)); + return *this; +} + +Section& Section::Append(Endianness endianness, size_t size, + const Label& label) { + // If this label's value is known, there's no reason to waste an + // entry in references_ on it. + uint64_t value; + if (label.IsKnownConstant(&value)) return Append(endianness, size, value); + + // This will get caught when the references are resolved, but it's + // nicer to find out earlier. + assert(endianness != kUnsetEndian); + + references_.push_back(Reference(contents_.size(), endianness, size, label)); + contents_.append(size, 0); + return *this; +} + +#define ENDIANNESS_L kLittleEndian +#define ENDIANNESS_B kBigEndian +#define ENDIANNESS(e) ENDIANNESS_##e + +#define DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits) \ + Section& Section::e##bits(uint##bits##_t v) { \ + InsertEndian(ENDIANNESS(e), bits / 8, v, \ + back_insert_iterator(contents_)); \ + return *this; \ + } + +#define DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits) \ + Section& Section::e##bits(const Label& v) { \ + return Append(ENDIANNESS(e), bits / 8, v); \ + } + +// Define L16, B32, and friends. +#define DEFINE_SHORT_APPEND_ENDIAN(e, bits) \ + DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits) \ + DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits) + +DEFINE_SHORT_APPEND_LABEL_ENDIAN(L, 8); +DEFINE_SHORT_APPEND_LABEL_ENDIAN(B, 8); +DEFINE_SHORT_APPEND_ENDIAN(L, 16); +DEFINE_SHORT_APPEND_ENDIAN(L, 32); +DEFINE_SHORT_APPEND_ENDIAN(L, 64); +DEFINE_SHORT_APPEND_ENDIAN(B, 16); +DEFINE_SHORT_APPEND_ENDIAN(B, 32); +DEFINE_SHORT_APPEND_ENDIAN(B, 64); + +#define DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits) \ + Section& Section::D##bits(uint##bits##_t v) { \ + InsertEndian(endianness_, bits / 8, v, \ + back_insert_iterator(contents_)); \ + return *this; \ + } +#define DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits) \ + Section& Section::D##bits(const Label& v) { \ + return Append(endianness_, bits / 8, v); \ + } +#define DEFINE_SHORT_APPEND_DEFAULT(bits) \ + DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits) \ + DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits) + +DEFINE_SHORT_APPEND_LABEL_DEFAULT(8) +DEFINE_SHORT_APPEND_DEFAULT(16); +DEFINE_SHORT_APPEND_DEFAULT(32); +DEFINE_SHORT_APPEND_DEFAULT(64); + +Section& Section::LEB128(long long value) { + while (value < -0x40 || 0x3f < value) { + contents_ += (value & 0x7f) | 0x80; + if (value < 0) + value = (value >> 7) | ~(((unsigned long long)-1) >> 7); + else + value = (value >> 7); + } + contents_ += value & 0x7f; + return *this; +} + +Section& Section::ULEB128(uint64_t value) { + while (value > 0x7f) { + contents_ += (value & 0x7f) | 0x80; + value = (value >> 7); + } + contents_ += value; + return *this; +} + +Section& Section::Align(size_t alignment, uint8_t pad_byte) { + // ALIGNMENT must be a power of two. + assert(((alignment - 1) & alignment) == 0); + size_t new_size = (contents_.size() + alignment - 1) & ~(alignment - 1); + contents_.append(new_size - contents_.size(), pad_byte); + assert((contents_.size() & (alignment - 1)) == 0); + return *this; +} + +bool Section::GetContents(string* contents) { + // For each label reference, find the label's value, and patch it into + // the section's contents. + for (size_t i = 0; i < references_.size(); i++) { + Reference& r = references_[i]; + uint64_t value; + if (!r.label.IsKnownConstant(&value)) { + fprintf(stderr, "Undefined label #%zu at offset 0x%zx\n", i, r.offset); + return false; + } + assert(r.offset < contents_.size()); + assert(contents_.size() - r.offset >= r.size); + InsertEndian(r.endianness, r.size, value, contents_.begin() + r.offset); + } + contents->clear(); + std::swap(contents_, *contents); + references_.clear(); + return true; +} + +} // namespace test_assembler +} // namespace lul_test + +namespace lul_test { + +CFISection& CFISection::CIEHeader(uint64_t code_alignment_factor, + int data_alignment_factor, + unsigned return_address_register, + uint8_t version, const string& augmentation, + bool dwarf64) { + assert(!entry_length_); + entry_length_ = new PendingLength(); + in_fde_ = false; + + if (dwarf64) { + D32(kDwarf64InitialLengthMarker); + D64(entry_length_->length); + entry_length_->start = Here(); + D64(eh_frame_ ? kEHFrame64CIEIdentifier : kDwarf64CIEIdentifier); + } else { + D32(entry_length_->length); + entry_length_->start = Here(); + D32(eh_frame_ ? kEHFrame32CIEIdentifier : kDwarf32CIEIdentifier); + } + D8(version); + AppendCString(augmentation); + ULEB128(code_alignment_factor); + LEB128(data_alignment_factor); + if (version == 1) + D8(return_address_register); + else + ULEB128(return_address_register); + return *this; +} + +CFISection& CFISection::FDEHeader(Label cie_pointer, uint64_t initial_location, + uint64_t address_range, bool dwarf64) { + assert(!entry_length_); + entry_length_ = new PendingLength(); + in_fde_ = true; + fde_start_address_ = initial_location; + + if (dwarf64) { + D32(0xffffffff); + D64(entry_length_->length); + entry_length_->start = Here(); + if (eh_frame_) + D64(Here() - cie_pointer); + else + D64(cie_pointer); + } else { + D32(entry_length_->length); + entry_length_->start = Here(); + if (eh_frame_) + D32(Here() - cie_pointer); + else + D32(cie_pointer); + } + EncodedPointer(initial_location); + // The FDE length in an .eh_frame section uses the same encoding as the + // initial location, but ignores the base address (selected by the upper + // nybble of the encoding), as it's a length, not an address that can be + // made relative. + EncodedPointer(address_range, DwarfPointerEncoding(pointer_encoding_ & 0x0f)); + return *this; +} + +CFISection& CFISection::FinishEntry() { + assert(entry_length_); + Align(address_size_, lul::DW_CFA_nop); + entry_length_->length = Here() - entry_length_->start; + delete entry_length_; + entry_length_ = NULL; + in_fde_ = false; + return *this; +} + +CFISection& CFISection::EncodedPointer(uint64_t address, + DwarfPointerEncoding encoding, + const EncodedPointerBases& bases) { + // Omitted data is extremely easy to emit. + if (encoding == lul::DW_EH_PE_omit) return *this; + + // If (encoding & lul::DW_EH_PE_indirect) != 0, then we assume + // that ADDRESS is the address at which the pointer is stored --- in + // other words, that bit has no effect on how we write the pointer. + encoding = DwarfPointerEncoding(encoding & ~lul::DW_EH_PE_indirect); + + // Find the base address to which this pointer is relative. The upper + // nybble of the encoding specifies this. + uint64_t base; + switch (encoding & 0xf0) { + case lul::DW_EH_PE_absptr: + base = 0; + break; + case lul::DW_EH_PE_pcrel: + base = bases.cfi + Size(); + break; + case lul::DW_EH_PE_textrel: + base = bases.text; + break; + case lul::DW_EH_PE_datarel: + base = bases.data; + break; + case lul::DW_EH_PE_funcrel: + base = fde_start_address_; + break; + case lul::DW_EH_PE_aligned: + base = 0; + break; + default: + abort(); + }; + + // Make ADDRESS relative. Yes, this is appropriate even for "absptr" + // values; see gcc/unwind-pe.h. + address -= base; + + // Align the pointer, if required. + if ((encoding & 0xf0) == lul::DW_EH_PE_aligned) Align(AddressSize()); + + // Append ADDRESS to this section in the appropriate form. For the + // fixed-width forms, we don't need to differentiate between signed and + // unsigned encodings, because ADDRESS has already been extended to 64 + // bits before it was passed to us. + switch (encoding & 0x0f) { + case lul::DW_EH_PE_absptr: + Address(address); + break; + + case lul::DW_EH_PE_uleb128: + ULEB128(address); + break; + + case lul::DW_EH_PE_sleb128: + LEB128(address); + break; + + case lul::DW_EH_PE_udata2: + case lul::DW_EH_PE_sdata2: + D16(address); + break; + + case lul::DW_EH_PE_udata4: + case lul::DW_EH_PE_sdata4: + D32(address); + break; + + case lul::DW_EH_PE_udata8: + case lul::DW_EH_PE_sdata8: + D64(address); + break; + + default: + abort(); + } + + return *this; +}; + +} // namespace lul_test diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.h b/tools/profiler/tests/gtest/LulTestInfrastructure.h new file mode 100644 index 0000000000..9faa7ca858 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestInfrastructure.h @@ -0,0 +1,736 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy + +// Derived from: +// cfi_assembler.h: Define CFISection, a class for creating properly +// (and improperly) formatted DWARF CFI data for unit tests. + +// Derived from: +// test-assembler.h: interface to class for building complex binary streams. + +// To test the Breakpad symbol dumper and processor thoroughly, for +// all combinations of host system and minidump processor +// architecture, we need to be able to easily generate complex test +// data like debugging information and minidump files. +// +// For example, if we want our unit tests to provide full code +// coverage for stack walking, it may be difficult to persuade the +// compiler to generate every possible sort of stack walking +// information that we want to support; there are probably DWARF CFI +// opcodes that GCC never emits. Similarly, if we want to test our +// error handling, we will need to generate damaged minidumps or +// debugging information that (we hope) the client or compiler will +// never produce on its own. +// +// google_breakpad::TestAssembler provides a predictable and +// (relatively) simple way to generate complex formatted data streams +// like minidumps and CFI. Furthermore, because TestAssembler is +// portable, developers without access to (say) Visual Studio or a +// SPARC assembler can still work on test data for those targets. + +#ifndef LUL_TEST_INFRASTRUCTURE_H +#define LUL_TEST_INFRASTRUCTURE_H + +#include "LulDwarfExt.h" + +#include +#include + +using std::string; +using std::vector; + +namespace lul_test { +namespace test_assembler { + +// A Label represents a value not yet known that we need to store in a +// section. As long as all the labels a section refers to are defined +// by the time we retrieve its contents as bytes, we can use undefined +// labels freely in that section's construction. +// +// A label can be in one of three states: +// - undefined, +// - defined as the sum of some other label and a constant, or +// - a constant. +// +// A label's value never changes, but it can accumulate constraints. +// Adding labels and integers is permitted, and yields a label. +// Subtracting a constant from a label is permitted, and also yields a +// label. Subtracting two labels that have some relationship to each +// other is permitted, and yields a constant. +// +// For example: +// +// Label a; // a's value is undefined +// Label b; // b's value is undefined +// { +// Label c = a + 4; // okay, even though a's value is unknown +// b = c + 4; // also okay; b is now a+8 +// } +// Label d = b - 2; // okay; d == a+6, even though c is gone +// d.Value(); // error: d's value is not yet known +// d - a; // is 6, even though their values are not known +// a = 12; // now b == 20, and d == 18 +// d.Value(); // 18: no longer an error +// b.Value(); // 20 +// d = 10; // error: d is already defined. +// +// Label objects' lifetimes are unconstrained: notice that, in the +// above example, even though a and b are only related through c, and +// c goes out of scope, the assignment to a sets b's value as well. In +// particular, it's not necessary to ensure that a Label lives beyond +// Sections that refer to it. +class Label { + public: + Label(); // An undefined label. + explicit Label(uint64_t value); // A label with a fixed value + Label(const Label& value); // A label equal to another. + ~Label(); + + Label& operator=(uint64_t value); + Label& operator=(const Label& value); + Label operator+(uint64_t addend) const; + Label operator-(uint64_t subtrahend) const; + uint64_t operator-(const Label& subtrahend) const; + + // We could also provide == and != that work on undefined, but + // related, labels. + + // Return true if this label's value is known. If VALUE_P is given, + // set *VALUE_P to the known value if returning true. + bool IsKnownConstant(uint64_t* value_p = NULL) const; + + // Return true if the offset from LABEL to this label is known. If + // OFFSET_P is given, set *OFFSET_P to the offset when returning true. + // + // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', + // except that it also returns a value indicating whether the + // subtraction is possible given what we currently know of l and m. + // It can be possible even if we don't know l and m's values. For + // example: + // + // Label l, m; + // m = l + 10; + // l.IsKnownConstant(); // false + // m.IsKnownConstant(); // false + // uint64_t d; + // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. + // l-m // -10 + // m-l // 10 + // m.Value() // error: m's value is not known + bool IsKnownOffsetFrom(const Label& label, uint64_t* offset_p = NULL) const; + + private: + // A label's value, or if that is not yet known, how the value is + // related to other labels' values. A binding may be: + // - a known constant, + // - constrained to be equal to some other binding plus a constant, or + // - unconstrained, and free to take on any value. + // + // Many labels may point to a single binding, and each binding may + // refer to another, so bindings and labels form trees whose leaves + // are labels, whose interior nodes (and roots) are bindings, and + // where links point from children to parents. Bindings are + // reference counted, allowing labels to be lightweight, copyable, + // assignable, placed in containers, and so on. + class Binding { + public: + Binding(); + explicit Binding(uint64_t addend); + ~Binding(); + + // Increment our reference count. + void Acquire() { reference_count_++; }; + // Decrement our reference count, and return true if it is zero. + bool Release() { return --reference_count_ == 0; } + + // Set this binding to be equal to BINDING + ADDEND. If BINDING is + // NULL, then set this binding to the known constant ADDEND. + // Update every binding on this binding's chain to point directly + // to BINDING, or to be a constant, with addends adjusted + // appropriately. + void Set(Binding* binding, uint64_t value); + + // Return what we know about the value of this binding. + // - If this binding's value is a known constant, set BASE to + // NULL, and set ADDEND to its value. + // - If this binding is not a known constant but related to other + // bindings, set BASE to the binding at the end of the relation + // chain (which will always be unconstrained), and set ADDEND to the + // value to add to that binding's value to get this binding's + // value. + // - If this binding is unconstrained, set BASE to this, and leave + // ADDEND unchanged. + void Get(Binding** base, uint64_t* addend); + + private: + // There are three cases: + // + // - A binding representing a known constant value has base_ NULL, + // and addend_ equal to the value. + // + // - A binding representing a completely unconstrained value has + // base_ pointing to this; addend_ is unused. + // + // - A binding whose value is related to some other binding's + // value has base_ pointing to that other binding, and addend_ + // set to the amount to add to that binding's value to get this + // binding's value. We only represent relationships of the form + // x = y+c. + // + // Thus, the bind_ links form a chain terminating in either a + // known constant value or a completely unconstrained value. Most + // operations on bindings do path compression: they change every + // binding on the chain to point directly to the final value, + // adjusting addends as appropriate. + Binding* base_; + uint64_t addend_; + + // The number of Labels and Bindings pointing to this binding. + // (When a binding points to itself, indicating a completely + // unconstrained binding, that doesn't count as a reference.) + int reference_count_; + }; + + // This label's value. + Binding* value_; +}; + +// Conventions for representing larger numbers as sequences of bytes. +enum Endianness { + kBigEndian, // Big-endian: the most significant byte comes first. + kLittleEndian, // Little-endian: the least significant byte comes first. + kUnsetEndian, // used internally +}; + +// A section is a sequence of bytes, constructed by appending bytes +// to the end. Sections have a convenient and flexible set of member +// functions for appending data in various formats: big-endian and +// little-endian signed and unsigned values of different sizes; +// LEB128 and ULEB128 values (see below), and raw blocks of bytes. +// +// If you need to append a value to a section that is not convenient +// to compute immediately, you can create a label, append the +// label's value to the section, and then set the label's value +// later, when it's convenient to do so. Once a label's value is +// known, the section class takes care of updating all previously +// appended references to it. +// +// Once all the labels to which a section refers have had their +// values determined, you can get a copy of the section's contents +// as a string. +// +// Note that there is no specified "start of section" label. This is +// because there are typically several different meanings for "the +// start of a section": the offset of the section within an object +// file, the address in memory at which the section's content appear, +// and so on. It's up to the code that uses the Section class to +// keep track of these explicitly, as they depend on the application. +class Section { + public: + explicit Section(Endianness endianness = kUnsetEndian) + : endianness_(endianness){}; + + // A base class destructor should be either public and virtual, + // or protected and nonvirtual. + virtual ~Section(){}; + + // Return the default endianness of this section. + Endianness endianness() const { return endianness_; } + + // Append the SIZE bytes at DATA to the end of this section. Return + // a reference to this section. + Section& Append(const string& data) { + contents_.append(data); + return *this; + }; + + // Append data from SLICE to the end of this section. Return + // a reference to this section. + Section& Append(const lul::ImageSlice& slice) { + for (size_t i = 0; i < slice.length_; i++) { + contents_.append(1, slice.start_[i]); + } + return *this; + } + + // Append data from CSTRING to the end of this section. The terminating + // zero is not included. Return a reference to this section. + Section& Append(const char* cstring) { + for (size_t i = 0; cstring[i] != '\0'; i++) { + contents_.append(1, cstring[i]); + } + return *this; + } + + // Append SIZE copies of BYTE to the end of this section. Return a + // reference to this section. + Section& Append(size_t size, uint8_t byte) { + contents_.append(size, (char)byte); + return *this; + } + + // Append NUMBER to this section. ENDIANNESS is the endianness to + // use to write the number. SIZE is the length of the number in + // bytes. Return a reference to this section. + Section& Append(Endianness endianness, size_t size, uint64_t number); + Section& Append(Endianness endianness, size_t size, const Label& label); + + // Append SECTION to the end of this section. The labels SECTION + // refers to need not be defined yet. + // + // Note that this has no effect on any Labels' values, or on + // SECTION. If placing SECTION within 'this' provides new + // constraints on existing labels' values, then it's up to the + // caller to fiddle with those labels as needed. + Section& Append(const Section& section); + + // Append the contents of DATA as a series of bytes terminated by + // a NULL character. + Section& AppendCString(const string& data) { + Append(data); + contents_ += '\0'; + return *this; + } + + // Append VALUE or LABEL to this section, with the given bit width and + // endianness. Return a reference to this section. + // + // The names of these functions have the form : + // is either 'L' (little-endian, least significant byte first), + // 'B' (big-endian, most significant byte first), or + // 'D' (default, the section's default endianness) + // is 8, 16, 32, or 64. + // + // Since endianness doesn't matter for a single byte, all the + // =8 functions are equivalent. + // + // These can be used to write both signed and unsigned values, as + // the compiler will properly sign-extend a signed value before + // passing it to the function, at which point the function's + // behavior is the same either way. + Section& L8(uint8_t value) { + contents_ += value; + return *this; + } + Section& B8(uint8_t value) { + contents_ += value; + return *this; + } + Section& D8(uint8_t value) { + contents_ += value; + return *this; + } + Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), &B16(uint16_t), + &B32(uint32_t), &B64(uint64_t), &D16(uint16_t), &D32(uint32_t), + &D64(uint64_t); + Section &L8(const Label& label), &L16(const Label& label), + &L32(const Label& label), &L64(const Label& label), + &B8(const Label& label), &B16(const Label& label), + &B32(const Label& label), &B64(const Label& label), + &D8(const Label& label), &D16(const Label& label), + &D32(const Label& label), &D64(const Label& label); + + // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section& LEB128(long long value); + + // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section& ULEB128(uint64_t value); + + // Jump to the next location aligned on an ALIGNMENT-byte boundary, + // relative to the start of the section. Fill the gap with PAD_BYTE. + // ALIGNMENT must be a power of two. Return a reference to this + // section. + Section& Align(size_t alignment, uint8_t pad_byte = 0); + + // Return the current size of the section. + size_t Size() const { return contents_.size(); } + + // Return a label representing the start of the section. + // + // It is up to the user whether this label represents the section's + // position in an object file, the section's address in memory, or + // what have you; some applications may need both, in which case + // this simple-minded interface won't be enough. This class only + // provides a single start label, for use with the Here and Mark + // member functions. + // + // Ideally, we'd provide this in a subclass that actually knows more + // about the application at hand and can provide an appropriate + // collection of start labels. But then the appending member + // functions like Append and D32 would return a reference to the + // base class, not the derived class, and the chaining won't work. + // Since the only value here is in pretty notation, that's a fatal + // flaw. + Label start() const { return start_; } + + // Return a label representing the point at which the next Appended + // item will appear in the section, relative to start(). + Label Here() const { return start_ + Size(); } + + // Set *LABEL to Here, and return a reference to this section. + Section& Mark(Label* label) { + *label = Here(); + return *this; + } + + // If there are no undefined label references left in this + // section, set CONTENTS to the contents of this section, as a + // string, and clear this section. Return true on success, or false + // if there were still undefined labels. + bool GetContents(string* contents); + + private: + // Used internally. A reference to a label's value. + struct Reference { + Reference(size_t set_offset, Endianness set_endianness, size_t set_size, + const Label& set_label) + : offset(set_offset), + endianness(set_endianness), + size(set_size), + label(set_label) {} + + // The offset of the reference within the section. + size_t offset; + + // The endianness of the reference. + Endianness endianness; + + // The size of the reference. + size_t size; + + // The label to which this is a reference. + Label label; + }; + + // The default endianness of this section. + Endianness endianness_; + + // The contents of the section. + string contents_; + + // References to labels within those contents. + vector references_; + + // A label referring to the beginning of the section. + Label start_; +}; + +} // namespace test_assembler +} // namespace lul_test + +namespace lul_test { + +using lul::DwarfPointerEncoding; +using lul_test::test_assembler::Endianness; +using lul_test::test_assembler::Label; +using lul_test::test_assembler::Section; + +class CFISection : public Section { + public: + // CFI augmentation strings beginning with 'z', defined by the + // Linux/IA-64 C++ ABI, can specify interesting encodings for + // addresses appearing in FDE headers and call frame instructions (and + // for additional fields whose presence the augmentation string + // specifies). In particular, pointers can be specified to be relative + // to various base address: the start of the .text section, the + // location holding the address itself, and so on. These allow the + // frame data to be position-independent even when they live in + // write-protected pages. These variants are specified at the + // following two URLs: + // + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // CFISection leaves the production of well-formed 'z'-augmented CIEs and + // FDEs to the user, but does provide EncodedPointer, to emit + // properly-encoded addresses for a given pointer encoding. + // EncodedPointer uses an instance of this structure to find the base + // addresses it should use; you can establish a default for all encoded + // pointers appended to this section with SetEncodedPointerBases. + struct EncodedPointerBases { + EncodedPointerBases() : cfi(), text(), data() {} + + // The starting address of this CFI section in memory, for + // DW_EH_PE_pcrel. DW_EH_PE_pcrel pointers may only be used in data + // that has is loaded into the program's address space. + uint64_t cfi; + + // The starting address of this file's .text section, for DW_EH_PE_textrel. + uint64_t text; + + // The starting address of this file's .got or .eh_frame_hdr section, + // for DW_EH_PE_datarel. + uint64_t data; + }; + + // Create a CFISection whose endianness is ENDIANNESS, and where + // machine addresses are ADDRESS_SIZE bytes long. If EH_FRAME is + // true, use the .eh_frame format, as described by the Linux + // Standards Base Core Specification, instead of the DWARF CFI + // format. + CFISection(Endianness endianness, size_t address_size, bool eh_frame = false) + : Section(endianness), + address_size_(address_size), + eh_frame_(eh_frame), + pointer_encoding_(lul::DW_EH_PE_absptr), + encoded_pointer_bases_(), + entry_length_(NULL), + in_fde_(false) { + // The 'start', 'Here', and 'Mark' members of a CFISection all refer + // to section offsets. + start() = 0; + } + + // Return this CFISection's address size. + size_t AddressSize() const { return address_size_; } + + // Return true if this CFISection uses the .eh_frame format, or + // false if it contains ordinary DWARF CFI data. + bool ContainsEHFrame() const { return eh_frame_; } + + // Use ENCODING for pointers in calls to FDEHeader and EncodedPointer. + void SetPointerEncoding(DwarfPointerEncoding encoding) { + pointer_encoding_ = encoding; + } + + // Use the addresses in BASES as the base addresses for encoded + // pointers in subsequent calls to FDEHeader or EncodedPointer. + // This function makes a copy of BASES. + void SetEncodedPointerBases(const EncodedPointerBases& bases) { + encoded_pointer_bases_ = bases; + } + + // Append a Common Information Entry header to this section with the + // given values. If dwarf64 is true, use the 64-bit DWARF initial + // length format for the CIE's initial length. Return a reference to + // this section. You should call FinishEntry after writing the last + // instruction for the CIE. + // + // Before calling this function, you will typically want to use Mark + // or Here to make a label to pass to FDEHeader that refers to this + // CIE's position in the section. + CFISection& CIEHeader(uint64_t code_alignment_factor, + int data_alignment_factor, + unsigned return_address_register, uint8_t version = 3, + const string& augmentation = "", bool dwarf64 = false); + + // Append a Frame Description Entry header to this section with the + // given values. If dwarf64 is true, use the 64-bit DWARF initial + // length format for the CIE's initial length. Return a reference to + // this section. You should call FinishEntry after writing the last + // instruction for the CIE. + // + // This function doesn't support entries that are longer than + // 0xffffff00 bytes. (The "initial length" is always a 32-bit + // value.) Nor does it support .debug_frame sections longer than + // 0xffffff00 bytes. + CFISection& FDEHeader(Label cie_pointer, uint64_t initial_location, + uint64_t address_range, bool dwarf64 = false); + + // Note the current position as the end of the last CIE or FDE we + // started, after padding with DW_CFA_nops for alignment. This + // defines the label representing the entry's length, cited in the + // entry's header. Return a reference to this section. + CFISection& FinishEntry(); + + // Append the contents of BLOCK as a DW_FORM_block value: an + // unsigned LEB128 length, followed by that many bytes of data. + CFISection& Block(const lul::ImageSlice& block) { + ULEB128(block.length_); + Append(block); + return *this; + } + + // Append data from CSTRING as a DW_FORM_block value: an unsigned LEB128 + // length, followed by that many bytes of data. The terminating zero is not + // included. + CFISection& Block(const char* cstring) { + ULEB128(strlen(cstring)); + Append(cstring); + return *this; + } + + // Append ADDRESS to this section, in the appropriate size and + // endianness. Return a reference to this section. + CFISection& Address(uint64_t address) { + Section::Append(endianness(), address_size_, address); + return *this; + } + + // Append ADDRESS to this section, using ENCODING and BASES. ENCODING + // defaults to this section's default encoding, established by + // SetPointerEncoding. BASES defaults to this section's bases, set by + // SetEncodedPointerBases. If the DW_EH_PE_indirect bit is set in the + // encoding, assume that ADDRESS is where the true address is stored. + // Return a reference to this section. + // + // (C++ doesn't let me use default arguments here, because I want to + // refer to members of *this in the default argument expression.) + CFISection& EncodedPointer(uint64_t address) { + return EncodedPointer(address, pointer_encoding_, encoded_pointer_bases_); + } + CFISection& EncodedPointer(uint64_t address, DwarfPointerEncoding encoding) { + return EncodedPointer(address, encoding, encoded_pointer_bases_); + } + CFISection& EncodedPointer(uint64_t address, DwarfPointerEncoding encoding, + const EncodedPointerBases& bases); + + // Restate some member functions, to keep chaining working nicely. + CFISection& Mark(Label* label) { + Section::Mark(label); + return *this; + } + CFISection& D8(uint8_t v) { + Section::D8(v); + return *this; + } + CFISection& D16(uint16_t v) { + Section::D16(v); + return *this; + } + CFISection& D16(Label v) { + Section::D16(v); + return *this; + } + CFISection& D32(uint32_t v) { + Section::D32(v); + return *this; + } + CFISection& D32(const Label& v) { + Section::D32(v); + return *this; + } + CFISection& D64(uint64_t v) { + Section::D64(v); + return *this; + } + CFISection& D64(const Label& v) { + Section::D64(v); + return *this; + } + CFISection& LEB128(long long v) { + Section::LEB128(v); + return *this; + } + CFISection& ULEB128(uint64_t v) { + Section::ULEB128(v); + return *this; + } + + private: + // A length value that we've appended to the section, but is not yet + // known. LENGTH is the appended value; START is a label referring + // to the start of the data whose length was cited. + struct PendingLength { + Label length; + Label start; + }; + + // Constants used in CFI/.eh_frame data: + + // If the first four bytes of an "initial length" are this constant, then + // the data uses the 64-bit DWARF format, and the length itself is the + // subsequent eight bytes. + static const uint32_t kDwarf64InitialLengthMarker = 0xffffffffU; + + // The CIE identifier for 32- and 64-bit DWARF CFI and .eh_frame data. + static const uint32_t kDwarf32CIEIdentifier = ~(uint32_t)0; + static const uint64_t kDwarf64CIEIdentifier = ~(uint64_t)0; + static const uint32_t kEHFrame32CIEIdentifier = 0; + static const uint64_t kEHFrame64CIEIdentifier = 0; + + // The size of a machine address for the data in this section. + size_t address_size_; + + // If true, we are generating a Linux .eh_frame section, instead of + // a standard DWARF .debug_frame section. + bool eh_frame_; + + // The encoding to use for FDE pointers. + DwarfPointerEncoding pointer_encoding_; + + // The base addresses to use when emitting encoded pointers. + EncodedPointerBases encoded_pointer_bases_; + + // The length value for the current entry. + // + // Oddly, this must be dynamically allocated. Labels never get new + // values; they only acquire constraints on the value they already + // have, or assert if you assign them something incompatible. So + // each header needs truly fresh Label objects to cite in their + // headers and track their positions. The alternative is explicit + // destructor invocation and a placement new. Ick. + PendingLength* entry_length_; + + // True if we are currently emitting an FDE --- that is, we have + // called FDEHeader but have not yet called FinishEntry. + bool in_fde_; + + // If in_fde_ is true, this is its starting address. We use this for + // emitting DW_EH_PE_funcrel pointers. + uint64_t fde_start_address_; +}; + +} // namespace lul_test + +#endif // LUL_TEST_INFRASTRUCTURE_H diff --git a/tools/profiler/tests/gtest/ThreadProfileTest.cpp b/tools/profiler/tests/gtest/ThreadProfileTest.cpp new file mode 100644 index 0000000000..b8a15c39b2 --- /dev/null +++ b/tools/profiler/tests/gtest/ThreadProfileTest.cpp @@ -0,0 +1,60 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef MOZ_GECKO_PROFILER + +# include "ProfileBuffer.h" + +# include "mozilla/PowerOfTwo.h" +# include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h" +# include "mozilla/ProfileChunkedBuffer.h" + +# include "gtest/gtest.h" + +// Make sure we can record one entry and read it +TEST(ThreadProfile, InsertOneEntry) +{ + mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager( + 2 * (1 + uint32_t(sizeof(ProfileBufferEntry))) * 4, + 2 * (1 + uint32_t(sizeof(ProfileBufferEntry)))); + mozilla::ProfileChunkedBuffer profileChunkedBuffer( + mozilla::ProfileChunkedBuffer::ThreadSafety::WithMutex, chunkManager); + auto pb = mozilla::MakeUnique(profileChunkedBuffer); + pb->AddEntry(ProfileBufferEntry::Time(123.1)); + ProfileBufferEntry entry = pb->GetEntry(pb->BufferRangeStart()); + ASSERT_TRUE(entry.IsTime()); + ASSERT_EQ(123.1, entry.GetDouble()); +} + +// See if we can insert some entries +TEST(ThreadProfile, InsertEntriesNoWrap) +{ + mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager( + 100 * (1 + uint32_t(sizeof(ProfileBufferEntry))), + 100 * (1 + uint32_t(sizeof(ProfileBufferEntry))) / 4); + mozilla::ProfileChunkedBuffer profileChunkedBuffer( + mozilla::ProfileChunkedBuffer::ThreadSafety::WithMutex, chunkManager); + auto pb = mozilla::MakeUnique(profileChunkedBuffer); + const int test_size = 50; + for (int i = 0; i < test_size; i++) { + pb->AddEntry(ProfileBufferEntry::Time(i)); + } + int times = 0; + uint64_t readPos = pb->BufferRangeStart(); + while (readPos != pb->BufferRangeEnd()) { + ProfileBufferEntry entry = pb->GetEntry(readPos); + readPos++; + if (entry.GetKind() == ProfileBufferEntry::Kind::INVALID) { + continue; + } + ASSERT_TRUE(entry.IsTime()); + ASSERT_EQ(times, entry.GetDouble()); + times++; + } + ASSERT_EQ(test_size, times); +} + +#endif // MOZ_GECKO_PROFILER diff --git a/tools/profiler/tests/gtest/moz.build b/tools/profiler/tests/gtest/moz.build new file mode 100644 index 0000000000..4eb1fef762 --- /dev/null +++ b/tools/profiler/tests/gtest/moz.build @@ -0,0 +1,45 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at http://mozilla.org/MPL/2.0/. + +if ( + CONFIG["MOZ_GECKO_PROFILER"] + and CONFIG["OS_TARGET"] in ("Android", "Linux") + and CONFIG["CPU_ARCH"] + in ( + "arm", + "aarch64", + "x86", + "x86_64", + ) +): + UNIFIED_SOURCES += [ + "LulTest.cpp", + "LulTestDwarf.cpp", + "LulTestInfrastructure.cpp", + ] + +LOCAL_INCLUDES += [ + "/netwerk/base", + "/netwerk/protocol/http", + "/toolkit/components/jsoncpp/include", + "/tools/profiler/core", + "/tools/profiler/gecko", + "/tools/profiler/lul", +] + +if CONFIG["OS_TARGET"] != "Android": + UNIFIED_SOURCES += [ + "GeckoProfiler.cpp", + "ThreadProfileTest.cpp", + ] + +USE_LIBS += [ + "jsoncpp", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul-gtest" diff --git a/tools/profiler/tests/shared-head.js b/tools/profiler/tests/shared-head.js new file mode 100644 index 0000000000..d1b2f6868a --- /dev/null +++ b/tools/profiler/tests/shared-head.js @@ -0,0 +1,591 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* globals Assert */ +/* globals info */ + +/** + * This file contains utilities that can be shared between xpcshell tests and mochitests. + */ + +// The marker phases. +const INSTANT = 0; +const INTERVAL = 1; +const INTERVAL_START = 2; +const INTERVAL_END = 3; + +// This Services declaration may shadow another from head.js, so define it as +// a var rather than a const. + +const defaultSettings = { + entries: 8 * 1024 * 1024, // 8M entries = 64MB + interval: 1, // ms + features: [], + threads: ["GeckoMain"], +}; + +// Effectively `async`: Start the profiler and return the `startProfiler` +// promise that will get resolved when all child process have started their own +// profiler. +async function startProfiler(callersSettings) { + if (Services.profiler.IsActive()) { + Assert.ok( + Services.env.exists("MOZ_PROFILER_STARTUP"), + "The profiler is active at the begining of the test, " + + "the MOZ_PROFILER_STARTUP environment variable should be set." + ); + if (Services.env.exists("MOZ_PROFILER_STARTUP")) { + // If the startup profiling environment variable exists, it is likely + // that tests are being profiled. + // Stop the profiler before starting profiler tests. + info( + "This test starts and stops the profiler and is not compatible " + + "with the use of MOZ_PROFILER_STARTUP. " + + "Stopping the profiler before starting the test." + ); + await Services.profiler.StopProfiler(); + } else { + throw new Error( + "The profiler must not be active before starting it in a test." + ); + } + } + const settings = Object.assign({}, defaultSettings, callersSettings); + return Services.profiler.StartProfiler( + settings.entries, + settings.interval, + settings.features, + settings.threads, + 0, + settings.duration + ); +} + +function startProfilerForMarkerTests() { + return startProfiler({ + features: ["nostacksampling", "js"], + threads: ["GeckoMain", "DOM Worker"], + }); +} + +/** + * This is a helper function be able to run `await wait(500)`. Unfortunately + * this is needed as the act of collecting functions relies on the periodic + * sampling of the threads. See: + * https://bugzilla.mozilla.org/show_bug.cgi?id=1529053 + * + * @param {number} time + * @returns {Promise} + */ +function wait(time) { + return new Promise(resolve => { + // eslint-disable-next-line mozilla/no-arbitrary-setTimeout + setTimeout(resolve, time); + }); +} + +/** + * Get the payloads of a type recursively, including from all subprocesses. + * + * @param {Object} profile The gecko profile. + * @param {string} type The marker payload type, e.g. "DiskIO". + * @param {Array} payloadTarget The recursive list of payloads. + * @return {Array} The final payloads. + */ +function getPayloadsOfTypeFromAllThreads(profile, type, payloadTarget = []) { + for (const { markers } of profile.threads) { + for (const markerTuple of markers.data) { + const payload = markerTuple[markers.schema.data]; + if (payload && payload.type === type) { + payloadTarget.push(payload); + } + } + } + + for (const subProcess of profile.processes) { + getPayloadsOfTypeFromAllThreads(subProcess, type, payloadTarget); + } + + return payloadTarget; +} + +/** + * Get the payloads of a type from a single thread. + * + * @param {Object} thread The thread from a profile. + * @param {string} type The marker payload type, e.g. "DiskIO". + * @return {Array} The payloads. + */ +function getPayloadsOfType(thread, type) { + const { markers } = thread; + const results = []; + for (const markerTuple of markers.data) { + const payload = markerTuple[markers.schema.data]; + if (payload && payload.type === type) { + results.push(payload); + } + } + return results; +} + +/** + * Applies the marker schema to create individual objects for each marker + * + * @param {Object} thread The thread from a profile. + * @return {InflatedMarker[]} The markers. + */ +function getInflatedMarkerData(thread) { + const { markers, stringTable } = thread; + return markers.data.map(markerTuple => { + const marker = {}; + for (const [key, tupleIndex] of Object.entries(markers.schema)) { + marker[key] = markerTuple[tupleIndex]; + if (key === "name") { + // Use the string from the string table. + marker[key] = stringTable[marker[key]]; + } + } + return marker; + }); +} + +/** + * Applies the marker schema to create individual objects for each marker, then + * keeps only the network markers that match the profiler tests. + * + * @param {Object} thread The thread from a profile. + * @return {InflatedMarker[]} The filtered network markers. + */ +function getInflatedNetworkMarkers(thread) { + const markers = getInflatedMarkerData(thread); + return markers.filter( + m => + m.data && + m.data.type === "Network" && + // We filter out network markers that aren't related to the test, to + // avoid intermittents. + m.data.URI.includes("/tools/profiler/") + ); +} + +/** + * From a list of network markers, this returns pairs of start/stop markers. + * If a stop marker can't be found for a start marker, this will return an array + * of only 1 element. + * + * @param {InflatedMarker[]} networkMarkers Network markers + * @return {InflatedMarker[][]} Pairs of network markers + */ +function getPairsOfNetworkMarkers(allNetworkMarkers) { + // For each 'start' marker we want to find the next 'stop' or 'redirect' + // marker with the same id. + const result = []; + const mapOfStartMarkers = new Map(); // marker id -> id in result array + for (const marker of allNetworkMarkers) { + const { data } = marker; + if (data.status === "STATUS_START") { + if (mapOfStartMarkers.has(data.id)) { + const previousMarker = result[mapOfStartMarkers.get(data.id)][0]; + Assert.ok( + false, + `We found 2 start markers with the same id ${data.id}, without end marker in-between.` + + `The first marker has URI ${previousMarker.data.URI}, the second marker has URI ${data.URI}.` + + ` This should not happen.` + ); + continue; + } + + mapOfStartMarkers.set(data.id, result.length); + result.push([marker]); + } else { + // STOP or REDIRECT + if (!mapOfStartMarkers.has(data.id)) { + Assert.ok( + false, + `We found an end marker without a start marker (id: ${data.id}, URI: ${data.URI}). This should not happen.` + ); + continue; + } + result[mapOfStartMarkers.get(data.id)].push(marker); + mapOfStartMarkers.delete(data.id); + } + } + + return result; +} + +/** + * It can be helpful to force the profiler to collect a JavaScript sample. This + * function spins on a while loop until at least one more sample is collected. + * + * @return {number} The index of the collected sample. + */ +function captureAtLeastOneJsSample() { + function getProfileSampleCount() { + const profile = Services.profiler.getProfileData(); + return profile.threads[0].samples.data.length; + } + + const sampleCount = getProfileSampleCount(); + // Create an infinite loop until a sample has been collected. + while (true) { + if (sampleCount < getProfileSampleCount()) { + return sampleCount; + } + } +} + +function isJSONWhitespace(c) { + return ["\n", "\r", " ", "\t"].includes(c); +} + +function verifyJSONStringIsCompact(s) { + const stateData = 0; + const stateString = 1; + const stateEscapedChar = 2; + let state = stateData; + for (let i = 0; i < s.length; ++i) { + let c = s[i]; + switch (state) { + case stateData: + if (isJSONWhitespace(c)) { + Assert.ok( + false, + `"Unexpected JSON whitespace at index ${i} in profile: <<<${s}>>>"` + ); + return; + } + if (c == '"') { + state = stateString; + } + break; + case stateString: + if (c == '"') { + state = stateData; + } else if (c == "\\") { + state = stateEscapedChar; + } + break; + case stateEscapedChar: + state = stateString; + break; + } + } +} + +/** + * This function pauses the profiler before getting the profile. Then after + * getting the data, the profiler is stopped, and all profiler data is removed. + * @returns {Promise} + */ +async function stopNowAndGetProfile() { + // Don't await the pause, because each process will handle it before it + // receives the following `getProfileDataAsArrayBuffer()`. + Services.profiler.Pause(); + + const profileArrayBuffer = + await Services.profiler.getProfileDataAsArrayBuffer(); + await Services.profiler.StopProfiler(); + + const profileUint8Array = new Uint8Array(profileArrayBuffer); + const textDecoder = new TextDecoder("utf-8", { fatal: true }); + const profileString = textDecoder.decode(profileUint8Array); + verifyJSONStringIsCompact(profileString); + + return JSON.parse(profileString); +} + +/** + * This function ensures there's at least one sample, then pauses the profiler + * before getting the profile. Then after getting the data, the profiler is + * stopped, and all profiler data is removed. + * @returns {Promise} + */ +async function waitSamplingAndStopAndGetProfile() { + await Services.profiler.waitOnePeriodicSampling(); + return stopNowAndGetProfile(); +} + +/** + * Verifies that a marker is an interval marker. + * + * @param {InflatedMarker} marker + * @returns {boolean} + */ +function isIntervalMarker(inflatedMarker) { + return ( + inflatedMarker.phase === 1 && + typeof inflatedMarker.startTime === "number" && + typeof inflatedMarker.endTime === "number" + ); +} + +/** + * @param {Profile} profile + * @returns {Thread[]} + */ +function getThreads(profile) { + const threads = []; + + function getThreadsRecursive(process) { + for (const thread of process.threads) { + threads.push(thread); + } + for (const subprocess of process.processes) { + getThreadsRecursive(subprocess); + } + } + + getThreadsRecursive(profile); + return threads; +} + +/** + * Find a specific marker schema from any process of a profile. + * + * @param {Profile} profile + * @param {string} name + * @returns {MarkerSchema} + */ +function getSchema(profile, name) { + { + const schema = profile.meta.markerSchema.find(s => s.name === name); + if (schema) { + return schema; + } + } + for (const subprocess of profile.processes) { + const schema = subprocess.meta.markerSchema.find(s => s.name === name); + if (schema) { + return schema; + } + } + console.error("Parent process schema", profile.meta.markerSchema); + for (const subprocess of profile.processes) { + console.error("Child process schema", subprocess.meta.markerSchema); + } + throw new Error(`Could not find a schema for "${name}".`); +} + +/** + * This escapes all characters that have a special meaning in RegExps. + * This was stolen from https://github.com/sindresorhus/escape-string-regexp and + * so it is licence MIT and: + * Copyright (c) Sindre Sorhus (https://sindresorhus.com). + * See the full license in https://raw.githubusercontent.com/sindresorhus/escape-string-regexp/main/license. + * @param {string} string The string to be escaped + * @returns {string} The result + */ +function escapeStringRegexp(string) { + if (typeof string !== "string") { + throw new TypeError("Expected a string"); + } + + // Escape characters with special meaning either inside or outside character + // sets. Use a simple backslash escape when it’s always valid, and a `\xnn` + // escape when the simpler form would be disallowed by Unicode patterns’ + // stricter grammar. + return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d"); +} + +/** ------ Assertions helper ------ */ +/** + * This assert helper function makes it easy to check a lot of properties in an + * object. We augment Assert.sys.mjs to make it easier to use. + */ +Object.assign(Assert, { + /* + * It checks if the properties on the right are all present in the object on + * the left. Note that the object might still have other properties (see + * objectContainsOnly below if you want the stricter form). + * + * The basic form does basic equality on each expected property: + * + * Assert.objectContains(fixture, { + * foo: "foo", + * bar: 1, + * baz: true, + * }); + * + * But it also has a more powerful form with expectations. The available + * expectations are: + * - any(): this only checks for the existence of the property, not its value + * - number(), string(), boolean(), bigint(), function(), symbol(), object(): + * this checks if the value is of this type + * - objectContains(expected): this applies Assert.objectContains() + * recursively on this property. + * - stringContains(needle): this checks if the expected value is included in + * the property value. + * - stringMatches(regexp): this checks if the property value matches this + * regexp. The regexp can be passed as a string, to be dynamically built. + * + * example: + * + * Assert.objectContains(fixture, { + * name: Expect.stringMatches(`Load \\d+:.*${url}`), + * data: Expect.objectContains({ + * status: "STATUS_STOP", + * URI: Expect.stringContains("https://"), + * requestMethod: "GET", + * contentType: Expect.string(), + * startTime: Expect.number(), + * cached: Expect.boolean(), + * }), + * }); + * + * Each expectation will translate into one or more Assert call. Therefore if + * one expectation fails, this will be clearly visible in the test output. + * + * Expectations can also be normal functions, for example: + * + * Assert.objectContains(fixture, { + * number: value => Assert.greater(value, 5) + * }); + * + * Note that you'll need to use Assert inside this function. + */ + objectContains(object, expectedProperties) { + // Basic tests: we don't want to run other assertions if these tests fail. + if (typeof object !== "object") { + this.ok( + false, + `The first parameter should be an object, but found: ${object}.` + ); + return; + } + + if (typeof expectedProperties !== "object") { + this.ok( + false, + `The second parameter should be an object, but found: ${expectedProperties}.` + ); + return; + } + + for (const key of Object.keys(expectedProperties)) { + const expected = expectedProperties[key]; + if (!(key in object)) { + this.report( + true, + object, + expectedProperties, + `The object should contain the property "${key}", but it's missing.` + ); + continue; + } + + if (typeof expected === "function") { + // This is a function, so let's call it. + expected( + object[key], + `The object should contain the property "${key}" with an expected value and type.` + ); + } else { + // Otherwise, we check for equality. + this.equal( + object[key], + expectedProperties[key], + `The object should contain the property "${key}" with an expected value.` + ); + } + } + }, + + /** + * This is very similar to the previous `objectContains`, but this also looks + * at the number of the objects' properties. Thus this will fail if the + * objects don't have the same properties exactly. + */ + objectContainsOnly(object, expectedProperties) { + // Basic tests: we don't want to run other assertions if these tests fail. + if (typeof object !== "object") { + this.ok( + false, + `The first parameter should be an object but found: ${object}.` + ); + return; + } + + if (typeof expectedProperties !== "object") { + this.ok( + false, + `The second parameter should be an object but found: ${expectedProperties}.` + ); + return; + } + + // In objectContainsOnly, we specifically want to check if all properties + // from the fixture object are expected. + // We'll be failing a test only for the specific properties that weren't + // expected, and only fail with one message, so that the test outputs aren't + // spammed. + const extraProperties = []; + for (const fixtureKey of Object.keys(object)) { + if (!(fixtureKey in expectedProperties)) { + extraProperties.push(fixtureKey); + } + } + + if (extraProperties.length) { + // Some extra properties have been found. + this.report( + true, + object, + expectedProperties, + `These properties are present, but shouldn't: "${extraProperties.join( + '", "' + )}".` + ); + } + + // Now, let's carry on the rest of our work. + this.objectContains(object, expectedProperties); + }, +}); + +const Expect = { + any: + () => + actual => {} /* We don't check anything more than the presence of this property. */, +}; + +/* These functions are part of the Assert object, and we want to reuse them. */ +[ + "stringContains", + "stringMatches", + "objectContains", + "objectContainsOnly", +].forEach( + assertChecker => + (Expect[assertChecker] = + expected => + (actual, ...moreArgs) => + Assert[assertChecker](actual, expected, ...moreArgs)) +); + +/* These functions will only check for the type. */ +[ + "number", + "string", + "boolean", + "bigint", + "symbol", + "object", + "function", +].forEach(type => (Expect[type] = makeTypeChecker(type))); + +function makeTypeChecker(type) { + return (...unexpectedArgs) => { + if (unexpectedArgs.length) { + throw new Error( + "Type checkers expectations aren't expecting any argument." + ); + } + return (actual, message) => { + const isCorrect = typeof actual === type; + Assert.report(!isCorrect, actual, type, message, "has type"); + }; + }; +} +/* ------ End of assertion helper ------ */ diff --git a/tools/profiler/tests/xpcshell/head.js b/tools/profiler/tests/xpcshell/head.js new file mode 100644 index 0000000000..ce87b32fd5 --- /dev/null +++ b/tools/profiler/tests/xpcshell/head.js @@ -0,0 +1,244 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* import-globals-from ../shared-head.js */ + +// This Services declaration may shadow another from head.js, so define it as +// a var rather than a const. + +const { AppConstants } = ChromeUtils.importESModule( + "resource://gre/modules/AppConstants.sys.mjs" +); +const { setTimeout } = ChromeUtils.importESModule( + "resource://gre/modules/Timer.sys.mjs" +); + +// Load the shared head +const sharedHead = do_get_file("shared-head.js", false); +if (!sharedHead) { + throw new Error("Could not load the shared head."); +} +Services.scriptloader.loadSubScript( + Services.io.newFileURI(sharedHead).spec, + this +); + +/** + * This function takes a thread, and a sample tuple from the "data" array, and + * inflates the frame to be an array of strings. + * + * @param {Object} thread - The thread from the profile. + * @param {Array} sample - The tuple from the thread.samples.data array. + * @returns {Array} An array of function names. + */ +function getInflatedStackLocations(thread, sample) { + let stackTable = thread.stackTable; + let frameTable = thread.frameTable; + let stringTable = thread.stringTable; + let SAMPLE_STACK_SLOT = thread.samples.schema.stack; + let STACK_PREFIX_SLOT = stackTable.schema.prefix; + let STACK_FRAME_SLOT = stackTable.schema.frame; + let FRAME_LOCATION_SLOT = frameTable.schema.location; + + // Build the stack from the raw data and accumulate the locations in + // an array. + let stackIndex = sample[SAMPLE_STACK_SLOT]; + let locations = []; + while (stackIndex !== null) { + let stackEntry = stackTable.data[stackIndex]; + let frame = frameTable.data[stackEntry[STACK_FRAME_SLOT]]; + locations.push(stringTable[frame[FRAME_LOCATION_SLOT]]); + stackIndex = stackEntry[STACK_PREFIX_SLOT]; + } + + // The profiler tree is inverted, so reverse the array. + return locations.reverse(); +} + +/** + * This utility matches up stacks to see if they contain a certain sequence of + * stack frames. A correctly functioning profiler will have a certain sequence + * of stacks, but we can't always determine exactly which stacks will show up + * due to implementation changes, as well as memory addresses being arbitrary to + * that particular build. + * + * This function triggers a test failure with a nice debug message when it + * fails. + * + * @param {Array} actualStackFrames - As generated by + * inflatedStackFrames. + * @param {Array} expectedStackFrames - Matches a subset of + * actualStackFrames + */ +function expectStackToContain( + actualStackFrames, + expectedStackFrames, + message = "The actual stack and expected stack do not match." +) { + // Log the stacks that are being passed to this assertion, as it could be + // useful for when these tests fail. + console.log("Actual stack: ", actualStackFrames); + console.log( + "Expected to contain: ", + expectedStackFrames.map(s => s.toString()) + ); + + let actualIndex = 0; + + // Start walking the expected stack and look for matches. + for ( + let expectedIndex = 0; + expectedIndex < expectedStackFrames.length; + expectedIndex++ + ) { + const expectedStackFrame = expectedStackFrames[expectedIndex]; + + while (true) { + // Make sure that we haven't run out of actual stack frames. + if (actualIndex >= actualStackFrames.length) { + info(`Could not find a match for: "${expectedStackFrame.toString()}"`); + Assert.ok(false, message); + } + + const actualStackFrame = actualStackFrames[actualIndex]; + actualIndex++; + + const itMatches = + typeof expectedStackFrame === "string" + ? expectedStackFrame === actualStackFrame + : actualStackFrame.match(expectedStackFrame); + + if (itMatches) { + // We found a match, break out of this loop. + break; + } + // Keep on looping looking for a match. + } + } + + Assert.ok(true, message); +} + +/** + * @param {Thread} thread + * @param {string} filename - The filename used to trigger FileIO. + * @returns {InflatedMarkers[]} + */ +function getInflatedFileIOMarkers(thread, filename) { + const markers = getInflatedMarkerData(thread); + return markers.filter( + marker => + marker.data?.type === "FileIO" && + marker.data?.filename?.endsWith(filename) + ); +} + +/** + * Checks properties common to all FileIO markers. + * + * @param {InflatedMarkers[]} markers + * @param {string} filename + */ +function checkInflatedFileIOMarkers(markers, filename) { + greater(markers.length, 0, "Found some markers"); + + // See IOInterposeObserver::Observation::ObservedOperationString + const validOperations = new Set([ + "write", + "fsync", + "close", + "stat", + "create/open", + "read", + ]); + const validSources = new Set(["PoisonIOInterposer", "NSPRIOInterposer"]); + + for (const marker of markers) { + try { + ok( + marker.name.startsWith("FileIO"), + "Has a marker.name that starts with FileIO" + ); + equal(marker.data.type, "FileIO", "Has a marker.data.type"); + ok(isIntervalMarker(marker), "All FileIO markers are interval markers"); + ok( + validOperations.has(marker.data.operation), + `The markers have a known operation - "${marker.data.operation}"` + ); + ok( + validSources.has(marker.data.source), + `The FileIO marker has a known source "${marker.data.source}"` + ); + ok(marker.data.filename.endsWith(filename)); + ok(Boolean(marker.data.stack), "A stack was collected"); + } catch (error) { + console.error("Failing inflated FileIO marker:", marker); + throw error; + } + } +} + +/** + * Do deep equality checks for schema, but then surface nice errors for a user to know + * what to do if the check fails. + */ +function checkSchema(actual, expected) { + const schemaName = expected.name; + info(`Checking marker schema for "${schemaName}"`); + + try { + ok( + actual, + `Schema was found for "${schemaName}". See the test output for more information.` + ); + // Check individual properties to surface easier to debug errors. + deepEqual( + expected.display, + actual.display, + `The "display" property for ${schemaName} schema matches. See the test output for more information.` + ); + if (expected.data) { + ok(actual.data, `Schema was found for "${schemaName}"`); + for (const expectedDatum of expected.data) { + const actualDatum = actual.data.find(d => d.key === expectedDatum.key); + deepEqual( + expectedDatum, + actualDatum, + `The "${schemaName}" field "${expectedDatum.key}" matches expectations. See the test output for more information.` + ); + } + equal( + expected.data.length, + actual.data.length, + "The expected and actual data have the same number of items" + ); + } + + // Finally do a true deep equal. + deepEqual(expected, actual, "The entire schema is deepEqual"); + } catch (error) { + // The test results are not very human readable. This is a bit of a hacky + // solution to make it more readable. + dump("-----------------------------------------------------\n"); + dump("The expected marker schema:\n"); + dump("-----------------------------------------------------\n"); + dump(JSON.stringify(expected, null, 2)); + dump("\n"); + dump("-----------------------------------------------------\n"); + dump("The actual marker schema:\n"); + dump("-----------------------------------------------------\n"); + dump(JSON.stringify(actual, null, 2)); + dump("\n"); + dump("-----------------------------------------------------\n"); + dump("A marker schema was not equal to expectations. If you\n"); + dump("are modifying the schema, then please copy and paste\n"); + dump("the new schema into this test.\n"); + dump("-----------------------------------------------------\n"); + dump("Copy this: " + JSON.stringify(actual)); + dump("\n"); + dump("-----------------------------------------------------\n"); + + throw error; + } +} diff --git a/tools/profiler/tests/xpcshell/test_active_configuration.js b/tools/profiler/tests/xpcshell/test_active_configuration.js new file mode 100644 index 0000000000..c4336f3f32 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_active_configuration.js @@ -0,0 +1,115 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async () => { + info( + "Checking that the profiler can fetch the information about the active " + + "configuration that is being used to power the profiler." + ); + + equal( + Services.profiler.activeConfiguration, + null, + "When the profile is off, there is no active configuration." + ); + + { + info("Start the profiler."); + const entries = 10000; + const interval = 1; + const threads = ["GeckoMain"]; + const features = ["js"]; + const activeTabID = 123; + await Services.profiler.StartProfiler( + entries, + interval, + features, + threads, + activeTabID + ); + + info("Generate the activeConfiguration."); + const { activeConfiguration } = Services.profiler; + const expectedConfiguration = { + interval, + threads, + features, + activeTabID, + // The buffer is created as a power of two that can fit all of the entires + // into it. If the ratio of entries to buffer size ever changes, this setting + // will need to be updated. + capacity: Math.pow(2, 14), + }; + + deepEqual( + activeConfiguration, + expectedConfiguration, + "The active configuration matches configuration given." + ); + + info("Get the profile."); + const profile = Services.profiler.getProfileData(); + deepEqual( + profile.meta.configuration, + expectedConfiguration, + "The configuration also matches on the profile meta object." + ); + } + + { + const entries = 20000; + const interval = 0.5; + const threads = ["GeckoMain", "DOM Worker"]; + const features = []; + const activeTabID = 111; + const duration = 20; + + info("Restart the profiler with a new configuration."); + await Services.profiler.StartProfiler( + entries, + interval, + features, + threads, + activeTabID, + // Also start it with duration, this property is optional. + duration + ); + + info("Generate the activeConfiguration."); + const { activeConfiguration } = Services.profiler; + const expectedConfiguration = { + interval, + threads, + features, + activeTabID, + duration, + // The buffer is created as a power of two that can fit all of the entires + // into it. If the ratio of entries to buffer size ever changes, this setting + // will need to be updated. + capacity: Math.pow(2, 15), + }; + + deepEqual( + activeConfiguration, + expectedConfiguration, + "The active configuration matches the new configuration." + ); + + info("Get the profile."); + const profile = Services.profiler.getProfileData(); + deepEqual( + profile.meta.configuration, + expectedConfiguration, + "The configuration also matches on the profile meta object." + ); + } + + await Services.profiler.StopProfiler(); + + equal( + Services.profiler.activeConfiguration, + null, + "When the profile is off, there is no active configuration." + ); +}); diff --git a/tools/profiler/tests/xpcshell/test_addProfilerMarker.js b/tools/profiler/tests/xpcshell/test_addProfilerMarker.js new file mode 100644 index 0000000000..b11545a41c --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_addProfilerMarker.js @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that ChromeUtils.addProfilerMarker is working correctly. + */ + +const markerNamePrefix = "test_addProfilerMarker"; +const markerText = "Text payload"; +// The same startTime will be used for all markers with a duration, +// and we store this value globally so that expectDuration and +// expectNoDuration can access it. The value isn't set here as we +// want a start time after the profiler has started +var startTime; + +function expectNoDuration(marker) { + Assert.equal( + typeof marker.startTime, + "number", + "startTime should be a number" + ); + Assert.greater( + marker.startTime, + startTime, + "startTime should be after the begining of the test" + ); + Assert.equal(typeof marker.endTime, "number", "endTime should be a number"); + Assert.equal(marker.endTime, 0, "endTime should be 0"); +} + +function expectDuration(marker) { + Assert.equal( + typeof marker.startTime, + "number", + "startTime should be a number" + ); + // Floats can cause rounding issues. We've seen up to a 4.17e-5 difference in + // intermittent failures, so we are permissive and accept up to 5e-5. + Assert.less( + Math.abs(marker.startTime - startTime), + 5e-5, + "startTime should be the expected time" + ); + Assert.equal(typeof marker.endTime, "number", "endTime should be a number"); + Assert.greater( + marker.endTime, + startTime, + "endTime should be after startTime" + ); +} + +function expectNoData(marker) { + Assert.equal( + typeof marker.data, + "undefined", + "The data property should be undefined" + ); +} + +function expectText(marker) { + Assert.equal( + typeof marker.data, + "object", + "The data property should be an object" + ); + Assert.equal(marker.data.type, "Text", "Should be a Text marker"); + Assert.equal( + marker.data.name, + markerText, + "The payload should contain the expected text" + ); +} + +function expectNoStack(marker) { + Assert.ok(!marker.data || !marker.data.stack, "There should be no stack"); +} + +function expectStack(marker, thread) { + let stack = marker.data.stack; + Assert.ok(!!stack, "There should be a stack"); + + // Marker stacks are recorded as a profile of a thread with a single sample, + // get the stack id. + stack = stack.samples.data[0][stack.samples.schema.stack]; + + const stackPrefixCol = thread.stackTable.schema.prefix; + const stackFrameCol = thread.stackTable.schema.frame; + const frameLocationCol = thread.frameTable.schema.location; + + // Get the entire stack in an array for easier processing. + let result = []; + while (stack != null) { + let stackEntry = thread.stackTable.data[stack]; + let frame = thread.frameTable.data[stackEntry[stackFrameCol]]; + result.push(thread.stringTable[frame[frameLocationCol]]); + stack = stackEntry[stackPrefixCol]; + } + + Assert.greaterOrEqual( + result.length, + 1, + "There should be at least one frame in the stack" + ); + + Assert.ok( + result.some(frame => frame.includes("testMarker")), + "the 'testMarker' function should be visible in the stack" + ); + + Assert.ok( + !result.some(frame => frame.includes("ChromeUtils.addProfilerMarker")), + "the 'ChromeUtils.addProfilerMarker' label frame should not be visible in the stack" + ); +} + +add_task(async () => { + startProfilerForMarkerTests(); + startTime = Cu.now(); + while (Cu.now() < startTime + 1) { + // Busy wait for 1ms to ensure the intentionally set start time of markers + // will be significantly different from the time at which the marker is + // recorded. + } + info("startTime used for markers with durations: " + startTime); + + /* Each call to testMarker will record a marker with a unique name. + * The testFunctions and testCases objects contain respectively test + * functions to verify that the marker found in the captured profile + * matches expectations, and a string that can be printed to describe + * in which way ChromeUtils.addProfilerMarker was called. */ + let testFunctions = {}; + let testCases = {}; + let markerId = 0; + function testMarker(args, checks) { + let name = markerNamePrefix + markerId++; + ChromeUtils.addProfilerMarker(name, ...args); + testFunctions[name] = checks; + testCases[name] = `ChromeUtils.addProfilerMarker(${[name, ...args] + .toSource() + .slice(1, -1)})`; + } + + info("Record markers without options object."); + testMarker([], m => { + expectNoDuration(m); + expectNoData(m); + }); + testMarker([startTime], m => { + expectDuration(m); + expectNoData(m); + }); + testMarker([undefined, markerText], m => { + expectNoDuration(m); + expectText(m); + }); + testMarker([startTime, markerText], m => { + expectDuration(m); + expectText(m); + }); + + info("Record markers providing the duration as the startTime property."); + testMarker([{ startTime }], m => { + expectDuration(m); + expectNoData(m); + }); + testMarker([{}, markerText], m => { + expectNoDuration(m); + expectText(m); + }); + testMarker([{ startTime }, markerText], m => { + expectDuration(m); + expectText(m); + }); + + info("Record markers to test the captureStack property."); + const captureStack = true; + testMarker([], expectNoStack); + testMarker([startTime, markerText], expectNoStack); + testMarker([{ captureStack: false }], expectNoStack); + testMarker([{ captureStack }], expectStack); + testMarker([{ startTime, captureStack }], expectStack); + testMarker([{ captureStack }, markerText], expectStack); + testMarker([{ startTime, captureStack }, markerText], expectStack); + + info("Record markers to test the category property"); + function testCategory(args, expectedCategory) { + testMarker(args, marker => { + Assert.equal(marker.category, expectedCategory); + }); + } + testCategory([], "JavaScript"); + testCategory([{ category: "Test" }], "Test"); + testCategory([{ category: "Test" }, markerText], "Test"); + testCategory([{ category: "JavaScript" }], "JavaScript"); + testCategory([{ category: "Other" }], "Other"); + testCategory([{ category: "DOM" }], "DOM"); + testCategory([{ category: "does not exist" }], "Other"); + + info("Capture the profile"); + const profile = await stopNowAndGetProfile(); + const mainThread = profile.threads.find(({ name }) => name === "GeckoMain"); + const markers = getInflatedMarkerData(mainThread).filter(m => + m.name.startsWith(markerNamePrefix) + ); + Assert.equal( + markers.length, + Object.keys(testFunctions).length, + `Found ${markers.length} test markers in the captured profile` + ); + + for (let marker of markers) { + marker.category = profile.meta.categories[marker.category].name; + info(`${testCases[marker.name]} -> ${marker.toSource()}`); + + testFunctions[marker.name](marker, mainThread); + delete testFunctions[marker.name]; + } + + Assert.equal(0, Object.keys(testFunctions).length, "all markers were found"); +}); diff --git a/tools/profiler/tests/xpcshell/test_asm.js b/tools/profiler/tests/xpcshell/test_asm.js new file mode 100644 index 0000000000..ced36ce429 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_asm.js @@ -0,0 +1,76 @@ +// Check that asm.js code shows up on the stack. +add_task(async () => { + // This test assumes that it's starting on an empty profiler stack. + // (Note that the other profiler tests also assume the profiler + // isn't already started.) + Assert.ok(!Services.profiler.IsActive()); + + let jsFuns = Cu.getJSTestingFunctions(); + if (!jsFuns.isAsmJSCompilationAvailable()) { + return; + } + + const ms = 10; + await Services.profiler.StartProfiler(10000, ms, ["js"]); + + let stack = null; + function ffi_function() { + var delayMS = 5; + while (1) { + let then = Date.now(); + do { + // do nothing + } while (Date.now() - then < delayMS); + + var thread0 = Services.profiler.getProfileData().threads[0]; + + if (delayMS > 30000) { + return; + } + + delayMS *= 2; + + if (!thread0.samples.data.length) { + continue; + } + + var lastSample = thread0.samples.data[thread0.samples.data.length - 1]; + stack = String(getInflatedStackLocations(thread0, lastSample)); + if (stack.includes("trampoline")) { + return; + } + } + } + + function asmjs_module(global, ffis) { + "use asm"; + var ffi = ffis.ffi; + function asmjs_function() { + ffi(); + } + return asmjs_function; + } + + Assert.ok(jsFuns.isAsmJSModule(asmjs_module)); + + var asmjs_function = asmjs_module(null, { ffi: ffi_function }); + Assert.ok(jsFuns.isAsmJSFunction(asmjs_function)); + + asmjs_function(); + + Assert.notEqual(stack, null); + + var i1 = stack.indexOf("entry trampoline"); + Assert.ok(i1 !== -1); + var i2 = stack.indexOf("asmjs_function"); + Assert.ok(i2 !== -1); + var i3 = stack.indexOf("exit trampoline"); + Assert.ok(i3 !== -1); + var i4 = stack.indexOf("ffi_function"); + Assert.ok(i4 !== -1); + Assert.ok(i1 < i2); + Assert.ok(i2 < i3); + Assert.ok(i3 < i4); + + await Services.profiler.StopProfiler(); +}); diff --git a/tools/profiler/tests/xpcshell/test_assertion_helper.js b/tools/profiler/tests/xpcshell/test_assertion_helper.js new file mode 100644 index 0000000000..baa4c34818 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_assertion_helper.js @@ -0,0 +1,162 @@ +add_task(function setup() { + // With the default reporter, an assertion doesn't throw if it fails, it + // merely report the result to the reporter and then go on. But in this test + // we want that a failure really throws, so that we can actually assert that + // it throws in case of failures! + // That's why we disable the default repoter here. + // I noticed that this line needs to be in an add_task (or possibly run_test) + // function. If put outside this will crash the test. + Assert.setReporter(null); +}); + +add_task(function test_objectContains() { + const fixture = { + foo: "foo", + bar: "bar", + }; + + Assert.objectContains(fixture, { foo: "foo" }, "Matches one property value"); + Assert.objectContains( + fixture, + { foo: "foo", bar: "bar" }, + "Matches both properties" + ); + Assert.objectContainsOnly( + fixture, + { foo: "foo", bar: "bar" }, + "Matches both properties" + ); + Assert.throws( + () => Assert.objectContainsOnly(fixture, { foo: "foo" }), + /AssertionError/, + "Fails if some properties are missing" + ); + Assert.throws( + () => Assert.objectContains(fixture, { foo: "bar" }), + /AssertionError/, + "Fails if the value for a present property is wrong" + ); + Assert.throws( + () => Assert.objectContains(fixture, { hello: "world" }), + /AssertionError/, + "Fails if an expected property is missing" + ); + Assert.throws( + () => Assert.objectContains(fixture, { foo: "foo", hello: "world" }), + /AssertionError/, + "Fails if some properties are present but others are missing" + ); +}); + +add_task(function test_objectContains_expectations() { + const fixture = { + foo: "foo", + bar: "bar", + num: 42, + nested: { + nestedFoo: "nestedFoo", + nestedBar: "nestedBar", + }, + }; + + Assert.objectContains( + fixture, + { + foo: Expect.stringMatches(/^fo/), + bar: Expect.stringContains("ar"), + num: Expect.number(), + nested: Expect.objectContainsOnly({ + nestedFoo: Expect.stringMatches(/[Ff]oo/), + nestedBar: Expect.stringMatches(/[Bb]ar/), + }), + }, + "Supports expectations" + ); + Assert.objectContainsOnly( + fixture, + { + foo: Expect.stringMatches(/^fo/), + bar: Expect.stringContains("ar"), + num: Expect.number(), + nested: Expect.objectContains({ + nestedFoo: Expect.stringMatches(/[Ff]oo/), + }), + }, + "Supports expectations" + ); + + Assert.objectContains(fixture, { + num: val => Assert.greater(val, 40), + }); + + // Failed expectations + Assert.throws( + () => + Assert.objectContains(fixture, { + foo: Expect.stringMatches(/bar/), + }), + /AssertionError/, + "Expect.stringMatches shouldn't match when the value is unexpected" + ); + Assert.throws( + () => + Assert.objectContains(fixture, { + foo: Expect.stringContains("bar"), + }), + /AssertionError/, + "Expect.stringContains shouldn't match when the value is unexpected" + ); + Assert.throws( + () => + Assert.objectContains(fixture, { + foo: Expect.number(), + }), + /AssertionError/, + "Expect.number shouldn't match when the value isn't a number" + ); + Assert.throws( + () => + Assert.objectContains(fixture, { + nested: Expect.objectContains({ + nestedFoo: "bar", + }), + }), + /AssertionError/, + "Expect.objectContains should throw when the value is unexpected" + ); + + Assert.throws( + () => + Assert.objectContains(fixture, { + num: val => Assert.less(val, 40), + }), + /AssertionError/, + "Expect.objectContains should throw when a function assertion fails" + ); +}); + +add_task(function test_type_expectations() { + const fixture = { + any: "foo", + string: "foo", + number: 42, + boolean: true, + bigint: 42n, + symbol: Symbol("foo"), + object: { foo: "foo" }, + function1() {}, + function2: () => {}, + }; + + Assert.objectContains(fixture, { + any: Expect.any(), + string: Expect.string(), + number: Expect.number(), + boolean: Expect.boolean(), + bigint: Expect.bigint(), + symbol: Expect.symbol(), + object: Expect.object(), + function1: Expect.function(), + function2: Expect.function(), + }); +}); diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr.js b/tools/profiler/tests/xpcshell/test_enterjit_osr.js new file mode 100644 index 0000000000..86845ddc76 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_enterjit_osr.js @@ -0,0 +1,52 @@ +// Check that the EnterJIT frame, added by the JIT trampoline and +// usable by a native unwinder to resume unwinding after encountering +// JIT code, is pushed as expected. +function run_test() { + // This test assumes that it's starting on an empty profiler stack. + // (Note that the other profiler tests also assume the profiler + // isn't already started.) + Assert.ok(!Services.profiler.IsActive()); + + const ms = 5; + Services.profiler.StartProfiler(10000, ms, ["js"]); + + function has_arbitrary_name_in_stack() { + // A frame for |arbitrary_name| has been pushed. Do a sequence of + // increasingly long spins until we get a sample. + var delayMS = 5; + while (1) { + info("loop: ms = " + delayMS); + const then = Date.now(); + do { + let n = 10000; + // eslint-disable-next-line no-empty + while (--n) {} // OSR happens here + // Spin in the hope of getting a sample. + } while (Date.now() - then < delayMS); + let profile = Services.profiler.getProfileData().threads[0]; + + // Go through all of the stacks, and search for this function name. + for (const sample of profile.samples.data) { + const stack = getInflatedStackLocations(profile, sample); + info(`The following stack was found: ${stack}`); + for (var i = 0; i < stack.length; i++) { + if (stack[i].match(/arbitrary_name/)) { + // This JS sample was correctly found. + return true; + } + } + } + + // Continue running this function with an increasingly long delay. + delayMS *= 2; + if (delayMS > 30000) { + return false; + } + } + } + Assert.ok( + has_arbitrary_name_in_stack(), + "A JS frame was found before the test timeout." + ); + Services.profiler.StopProfiler(); +} diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js b/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js new file mode 100644 index 0000000000..558c9b0c3b --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js @@ -0,0 +1,14 @@ +function run_test() { + Assert.ok(!Services.profiler.IsActive()); + + Services.profiler.StartProfiler(100, 10, ["js"]); + // The function is entered with the profiler enabled + (function () { + Services.profiler.StopProfiler(); + let n = 10000; + // eslint-disable-next-line no-empty + while (--n) {} // OSR happens here with the profiler disabled. + // An assertion will fail when this function returns, if the + // profiler stack was misbalanced. + })(); +} diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js b/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js new file mode 100644 index 0000000000..313d939caf --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js @@ -0,0 +1,14 @@ +function run_test() { + Assert.ok(!Services.profiler.IsActive()); + + // The function is entered with the profiler disabled. + (function () { + Services.profiler.StartProfiler(100, 10, ["js"]); + let n = 10000; + // eslint-disable-next-line no-empty + while (--n) {} // OSR happens here with the profiler enabled. + // An assertion will fail when this function returns, if the + // profiler stack was misbalanced. + })(); + Services.profiler.StopProfiler(); +} diff --git a/tools/profiler/tests/xpcshell/test_feature_fileioall.js b/tools/profiler/tests/xpcshell/test_feature_fileioall.js new file mode 100644 index 0000000000..e5ac040b98 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_fileioall.js @@ -0,0 +1,159 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async () => { + info( + "Test that off-main thread fileio is captured for a profiled thread, " + + "and that it will be sent to the main thread." + ); + const filename = "test_marker_fileio"; + const profile = await startProfilerAndTriggerFileIO({ + features: ["fileioall"], + threadsFilter: ["GeckoMain", "BgIOThreadPool"], + filename, + }); + + const threads = getThreads(profile); + const mainThread = threads.find(thread => thread.name === "GeckoMain"); + const mainThreadFileIO = getInflatedFileIOMarkers(mainThread, filename); + let backgroundThread; + let backgroundThreadFileIO; + for (const thread of threads) { + // Check for FileIO in any of the background threads. + if (thread.name.startsWith("BgIOThreadPool")) { + const markers = getInflatedFileIOMarkers(thread, filename); + if (markers.length) { + backgroundThread = thread; + backgroundThreadFileIO = markers; + break; + } + } + } + + info("Check all of the main thread FileIO markers."); + checkInflatedFileIOMarkers(mainThreadFileIO, filename); + for (const { data, name } of mainThreadFileIO) { + equal( + name, + "FileIO (non-main thread)", + "The markers from off main thread are labeled as such." + ); + equal( + data.threadId, + backgroundThread.tid, + "The main thread FileIO markers were all sent from the background thread." + ); + } + + info("Check all of the background thread FileIO markers."); + checkInflatedFileIOMarkers(backgroundThreadFileIO, filename); + for (const { data, name } of backgroundThreadFileIO) { + equal( + name, + "FileIO", + "The markers on the thread where they were generated just say FileIO" + ); + equal( + data.threadId, + undefined, + "The background thread FileIO correctly excludes the threadId." + ); + } +}); + +add_task(async () => { + info( + "Test that off-main thread fileio is captured for a thread that is not profiled, " + + "and that it will be sent to the main thread." + ); + const filename = "test_marker_fileio"; + const profile = await startProfilerAndTriggerFileIO({ + features: ["fileioall"], + threadsFilter: ["GeckoMain"], + filename, + }); + + const threads = getThreads(profile); + const mainThread = threads.find(thread => thread.name === "GeckoMain"); + const mainThreadFileIO = getInflatedFileIOMarkers(mainThread, filename); + + info("Check all of the main thread FileIO markers."); + checkInflatedFileIOMarkers(mainThreadFileIO, filename); + for (const { data, name } of mainThreadFileIO) { + equal( + name, + "FileIO (non-profiled thread)", + "The markers from off main thread are labeled as such." + ); + equal(typeof data.threadId, "number", "A thread ID is captured."); + } +}); + +/** + * @typedef {Object} TestConfig + * @prop {Array} features The list of profiler features + * @prop {string[]} threadsFilter The list of threads to profile + * @prop {string} filename A filename to trigger a write operation + */ + +/** + * Start the profiler and get FileIO markers. + * @param {TestConfig} + * @returns {Profile} + */ +async function startProfilerAndTriggerFileIO({ + features, + threadsFilter, + filename, +}) { + const entries = 10000; + const interval = 10; + await Services.profiler.StartProfiler( + entries, + interval, + features, + threadsFilter + ); + + const path = PathUtils.join(PathUtils.tempDir, filename); + + info(`Using a temporary file to test FileIO: ${path}`); + + if (fileExists(path)) { + console.warn( + "This test is triggering FileIO by writing to a file. However, the test found an " + + "existing file at the location it was trying to write to. This could happen " + + "because a previous run of the test failed to clean up after itself. This test " + + " will now clean up that file before running the test again." + ); + await removeFile(path); + } + + info("Write to the file, but do so using a background thread."); + + // IOUtils handles file operations using a background thread. + await IOUtils.write(path, new TextEncoder().encode("Test data.")); + const exists = await fileExists(path); + ok(exists, `Created temporary file at: ${path}`); + + info("Remove the file"); + await removeFile(path); + + return stopNowAndGetProfile(); +} + +async function fileExists(file) { + try { + let { type } = await IOUtils.stat(file); + return type === "regular"; + } catch (_error) { + return false; + } +} + +async function removeFile(file) { + await IOUtils.remove(file); + const exists = await fileExists(file); + ok(!exists, `Removed temporary file: ${file}`); +} diff --git a/tools/profiler/tests/xpcshell/test_feature_java.js b/tools/profiler/tests/xpcshell/test_feature_java.js new file mode 100644 index 0000000000..e2f6879c2b --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_java.js @@ -0,0 +1,31 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that Java capturing works as expected. + */ +add_task(async () => { + info("Test that Android Java sampler works as expected."); + const entries = 10000; + const interval = 1; + const threads = []; + const features = ["java"]; + + Services.profiler.StartProfiler(entries, interval, features, threads); + Assert.ok(Services.profiler.IsActive()); + + await captureAtLeastOneJsSample(); + + info( + "Stop the profiler and check that we have successfully captured a profile" + + " with the AndroidUI thread." + ); + const profile = await stopNowAndGetProfile(); + Assert.notEqual(profile, null); + const androidUiThread = profile.threads.find( + thread => thread.name == "AndroidUI (JVM)" + ); + Assert.notEqual(androidUiThread, null); + Assert.ok(!Services.profiler.IsActive()); +}); diff --git a/tools/profiler/tests/xpcshell/test_feature_js.js b/tools/profiler/tests/xpcshell/test_feature_js.js new file mode 100644 index 0000000000..a5949e4a0c --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_js.js @@ -0,0 +1,63 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that JS capturing works as expected. + */ +add_task(async () => { + const entries = 10000; + const interval = 1; + const threads = []; + const features = ["js"]; + + await Services.profiler.StartProfiler(entries, interval, features, threads); + + // Call the following to get a nice stack in the profiler: + // functionA -> functionB -> functionC -> captureAtLeastOneJsSample + const sampleIndex = await functionA(); + + const profile = await stopNowAndGetProfile(); + + const [thread] = profile.threads; + const { samples } = thread; + + const inflatedStackFrames = getInflatedStackLocations( + thread, + samples.data[sampleIndex] + ); + + expectStackToContain( + inflatedStackFrames, + [ + "(root)", + "js::RunScript", + // The following regexes match a string similar to: + // + // "functionA (/gecko/obj/_tests/xpcshell/tools/profiler/tests/xpcshell/test_feature_js.js:47:0)" + // or + // "functionA (test_feature_js.js:47:0)" + // + // this matches the script location + // | match the line number + // | | match the column number + // v v v + /^functionA \(.*test_feature_js\.js:\d+:\d+\)$/, + /^functionB \(.*test_feature_js\.js:\d+:\d+\)$/, + /^functionC \(.*test_feature_js\.js:\d+:\d+\)$/, + ], + "The stack contains a few frame labels, as well as the JS functions that we called." + ); +}); + +function functionA() { + return functionB(); +} + +function functionB() { + return functionC(); +} + +async function functionC() { + return captureAtLeastOneJsSample(); +} diff --git a/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js b/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js new file mode 100644 index 0000000000..8ff5c9206d --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js @@ -0,0 +1,122 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const { FileUtils } = ChromeUtils.importESModule( + "resource://gre/modules/FileUtils.sys.mjs" +); + +/** + * Test that the IOInterposer is working correctly to capture main thread IO. + * + * This test should not run on release or beta, as the IOInterposer is wrapped in + * an ifdef. + */ +add_task(async () => { + { + const filename = "profiler-mainthreadio-test-firstrun"; + const { markers, schema } = await runProfilerWithFileIO( + ["mainthreadio"], + filename + ); + info("Check the FileIO markers when using the mainthreadio feature"); + checkInflatedFileIOMarkers(markers, filename); + + checkSchema(schema, { + name: "FileIO", + display: ["marker-chart", "marker-table", "timeline-fileio"], + data: [ + { + key: "operation", + label: "Operation", + format: "string", + searchable: true, + }, + { key: "source", label: "Source", format: "string", searchable: true }, + { + key: "filename", + label: "Filename", + format: "file-path", + searchable: true, + }, + { + key: "threadId", + label: "Thread ID", + format: "string", + searchable: true, + }, + ], + }); + } + + { + const filename = "profiler-mainthreadio-test-no-instrumentation"; + const { markers } = await runProfilerWithFileIO([], filename); + equal( + markers.length, + 0, + "No FileIO markers are found when the mainthreadio feature is not turned on " + + "in the profiler." + ); + } + + { + const filename = "profiler-mainthreadio-test-secondrun"; + const { markers } = await runProfilerWithFileIO(["mainthreadio"], filename); + info("Check the FileIO markers when re-starting the mainthreadio feature"); + checkInflatedFileIOMarkers(markers, filename); + } +}); + +/** + * Start the profiler and get FileIO markers and schema. + * + * @param {Array} features The list of profiler features + * @param {string} filename A filename to trigger a write operation + * @returns {{ + * markers: InflatedMarkers[]; + * schema: MarkerSchema; + * }} + */ +async function runProfilerWithFileIO(features, filename) { + const entries = 10000; + const interval = 10; + const threads = []; + await Services.profiler.StartProfiler(entries, interval, features, threads); + + info("Get the file"); + const file = FileUtils.getFile("TmpD", [filename]); + if (file.exists()) { + console.warn( + "This test is triggering FileIO by writing to a file. However, the test found an " + + "existing file at the location it was trying to write to. This could happen " + + "because a previous run of the test failed to clean up after itself. This test " + + " will now clean up that file before running the test again." + ); + file.remove(false); + } + + info( + "Generate file IO on the main thread using FileUtils.openSafeFileOutputStream." + ); + const outputStream = FileUtils.openSafeFileOutputStream(file); + + const data = "Test data."; + info("Write to the file"); + outputStream.write(data, data.length); + + info("Close the file"); + FileUtils.closeSafeFileOutputStream(outputStream); + + info("Remove the file"); + file.remove(false); + + const profile = await stopNowAndGetProfile(); + const mainThread = profile.threads.find(({ name }) => name === "GeckoMain"); + + const schema = getSchema(profile, "FileIO"); + + const markers = getInflatedFileIOMarkers(mainThread, filename); + + return { schema, markers }; +} diff --git a/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js b/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js new file mode 100644 index 0000000000..64398d7ef9 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js @@ -0,0 +1,158 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async () => { + if (!Services.profiler.GetFeatures().includes("nativeallocations")) { + Assert.ok( + true, + "Native allocations are not supported by this build, " + + "skip run the rest of the test." + ); + return; + } + + Assert.ok( + !Services.profiler.IsActive(), + "The profiler is not currently active" + ); + + info( + "Test that the profiler can install memory hooks and collect native allocation " + + "information in the marker payloads." + ); + { + info("Start the profiler."); + await startProfiler({ + // Only instrument the main thread. + threads: ["GeckoMain"], + features: ["js", "nativeallocations"], + }); + + info( + "Do some JS work for a little bit. This will increase the amount of allocations " + + "that take place." + ); + doWork(); + + info("Get the profile data and analyze it."); + const profile = await waitSamplingAndStopAndGetProfile(); + + const { + allocationPayloads, + unmatchedAllocations, + logAllocationsAndDeallocations, + } = getAllocationInformation(profile); + + Assert.greater( + allocationPayloads.length, + 0, + "Native allocation payloads were recorded for the parent process' main thread when " + + "the Native Allocation feature was turned on." + ); + + if (unmatchedAllocations.length !== 0) { + info( + "There were unmatched allocations. Log all of the allocations and " + + "deallocations in order to aid debugging." + ); + logAllocationsAndDeallocations(); + ok( + false, + "Found a deallocation that did not have a matching allocation site. " + + "This could happen if balanced allocations is broken, or if the the " + + "buffer size of this test was too small, and some markers ended up " + + "rolling off." + ); + } + + ok(true, "All deallocation sites had matching allocations."); + } + + info("Restart the profiler, to ensure that we get no more allocations."); + { + await startProfiler({ features: ["js"] }); + info("Do some work again."); + doWork(); + info("Wait for the periodic sampling."); + const profile = await waitSamplingAndStopAndGetProfile(); + const allocationPayloads = getPayloadsOfType( + profile.threads[0], + "Native allocation" + ); + + Assert.equal( + allocationPayloads.length, + 0, + "No native allocations were collected when the feature was disabled." + ); + } +}); + +function doWork() { + this.n = 0; + for (let i = 0; i < 1e5; i++) { + this.n += Math.random(); + } +} + +/** + * Extract the allocation payloads, and find the unmatched allocations. + */ +function getAllocationInformation(profile) { + // Get all of the allocation payloads. + const allocationPayloads = getPayloadsOfType( + profile.threads[0], + "Native allocation" + ); + + // Decide what is an allocation and deallocation. + const allocations = allocationPayloads.filter( + payload => ensureIsNumber(payload.size) >= 0 + ); + const deallocations = allocationPayloads.filter( + payload => ensureIsNumber(payload.size) < 0 + ); + + // Now determine the unmatched allocations by building a set + const allocationSites = new Set( + allocations.map(({ memoryAddress }) => memoryAddress) + ); + + const unmatchedAllocations = deallocations.filter( + ({ memoryAddress }) => !allocationSites.has(memoryAddress) + ); + + // Provide a helper to log out the allocations and deallocations on failure. + function logAllocationsAndDeallocations() { + for (const { memoryAddress } of allocations) { + console.log("Allocations", formatHex(memoryAddress)); + allocationSites.add(memoryAddress); + } + + for (const { memoryAddress } of deallocations) { + console.log("Deallocations", formatHex(memoryAddress)); + } + + for (const { memoryAddress } of unmatchedAllocations) { + console.log("Deallocation with no allocation", formatHex(memoryAddress)); + } + } + + return { + allocationPayloads, + unmatchedAllocations, + logAllocationsAndDeallocations, + }; +} + +function ensureIsNumber(value) { + if (typeof value !== "number") { + throw new Error(`Expected a number: ${value}`); + } + return value; +} + +function formatHex(number) { + return `0x${number.toString(16)}`; +} diff --git a/tools/profiler/tests/xpcshell/test_feature_stackwalking.js b/tools/profiler/tests/xpcshell/test_feature_stackwalking.js new file mode 100644 index 0000000000..aa0bc86547 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_stackwalking.js @@ -0,0 +1,48 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Do a basic test to see if native frames are being collected for stackwalking. This + * test is fairly naive, as it does not attempt to check that these are valid symbols, + * only that some kind of stack walking is happening. It does this by making sure at + * least two native frames are collected. + */ +add_task(async () => { + const entries = 10000; + const interval = 1; + const threads = []; + const features = ["stackwalk"]; + + await Services.profiler.StartProfiler(entries, interval, features, threads); + const sampleIndex = await captureAtLeastOneJsSample(); + + const profile = await stopNowAndGetProfile(); + const [thread] = profile.threads; + const { samples } = thread; + + const inflatedStackFrames = getInflatedStackLocations( + thread, + samples.data[sampleIndex] + ); + const nativeStack = /^0x[0-9a-f]+$/; + + expectStackToContain( + inflatedStackFrames, + [ + "(root)", + // There are probably more native stacks here. + nativeStack, + nativeStack, + // Since this is an xpcshell test we know that JavaScript will run: + "js::RunScript", + // There are probably more native stacks here. + nativeStack, + nativeStack, + ], + "Expected native stacks to be interleaved between some frame labels. There should" + + "be more than one native stack if stack walking is working correctly. There " + + "is no attempt here to determine if the memory addresses point to the correct " + + "symbols" + ); +}); diff --git a/tools/profiler/tests/xpcshell/test_get_features.js b/tools/profiler/tests/xpcshell/test_get_features.js new file mode 100644 index 0000000000..e9bf0047c8 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_get_features.js @@ -0,0 +1,8 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + var profilerFeatures = Services.profiler.GetFeatures(); + Assert.ok(profilerFeatures != null); +} diff --git a/tools/profiler/tests/xpcshell/test_merged_stacks.js b/tools/profiler/tests/xpcshell/test_merged_stacks.js new file mode 100644 index 0000000000..7f851e8de9 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_merged_stacks.js @@ -0,0 +1,74 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we correctly merge the three stack types, JS, native, and frame labels. + */ +add_task(async () => { + const entries = 10000; + const interval = 1; + const threads = []; + const features = ["js", "stackwalk"]; + + await Services.profiler.StartProfiler(entries, interval, features, threads); + + // Call the following to get a nice stack in the profiler: + // functionA -> functionB -> functionC + const sampleIndex = await functionA(); + + const profile = await stopNowAndGetProfile(); + const [thread] = profile.threads; + const { samples } = thread; + + const inflatedStackFrames = getInflatedStackLocations( + thread, + samples.data[sampleIndex] + ); + + const nativeStack = /^0x[0-9a-f]+$/; + + expectStackToContain( + inflatedStackFrames, + [ + "(root)", + nativeStack, + nativeStack, + // There are more native stacks and frame labels here, but we know some execute + // and then the "js::RunScript" frame label runs. + "js::RunScript", + nativeStack, + nativeStack, + // The following regexes match a string similar to: + // + // "functionA (/gecko/obj/_tests/xpcshell/tools/profiler/tests/xpcshell/test_merged_stacks.js:47:0)" + // or + // "functionA (test_merged_stacks.js:47:0)" + // + // this matches the script location + // | match the line number + // | | match the column number + // v v v + /^functionA \(.*test_merged_stacks\.js:\d+:\d+\)$/, + /^functionB \(.*test_merged_stacks\.js:\d+:\d+\)$/, + /^functionC \(.*test_merged_stacks\.js:\d+:\d+\)$/, + // After the JS frames, then there are a bunch of arbitrary native stack frames + // that run. + nativeStack, + nativeStack, + ], + "The stack contains a few frame labels, as well as the JS functions that we called." + ); +}); + +async function functionA() { + return functionB(); +} + +async function functionB() { + return functionC(); +} + +async function functionC() { + return captureAtLeastOneJsSample(); +} diff --git a/tools/profiler/tests/xpcshell/test_pause.js b/tools/profiler/tests/xpcshell/test_pause.js new file mode 100644 index 0000000000..0e621fb19f --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_pause.js @@ -0,0 +1,126 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async () => { + Assert.ok(!Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + + let startPromise = Services.profiler.StartProfiler(1000, 10, []); + + // Default: Active and not paused. + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + await startPromise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + // Pause everything, implicitly pauses sampling. + let pausePromise = Services.profiler.Pause(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await pausePromise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + // While fully paused, pause and resume sampling only, no expected changes. + let pauseSamplingPromise = Services.profiler.PauseSampling(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await pauseSamplingPromise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + let resumeSamplingPromise = Services.profiler.ResumeSampling(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await resumeSamplingPromise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + // Resume everything. + let resumePromise = Services.profiler.Resume(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + await resumePromise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + // Pause sampling only. + let pauseSampling2Promise = Services.profiler.PauseSampling(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await pauseSampling2Promise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + // While sampling is paused, pause everything. + let pause2Promise = Services.profiler.Pause(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await pause2Promise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + // Resume, but sampling is still paused separately. + let resume2promise = Services.profiler.Resume(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + await resume2promise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(Services.profiler.IsSamplingPaused()); + + // Resume sampling only. + let resumeSampling2Promise = Services.profiler.ResumeSampling(); + + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + await resumeSampling2Promise; + Assert.ok(Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + let stopPromise = Services.profiler.StopProfiler(); + Assert.ok(!Services.profiler.IsActive()); + // Stopping is not pausing. + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); + + await stopPromise; + Assert.ok(!Services.profiler.IsActive()); + Assert.ok(!Services.profiler.IsPaused()); + Assert.ok(!Services.profiler.IsSamplingPaused()); +}); diff --git a/tools/profiler/tests/xpcshell/test_responsiveness.js b/tools/profiler/tests/xpcshell/test_responsiveness.js new file mode 100644 index 0000000000..5f57173090 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_responsiveness.js @@ -0,0 +1,50 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that we can measure non-zero event delays + */ + +add_task(async () => { + const entries = 10000; + const interval = 1; + const threads = []; + const features = []; + + await Services.profiler.StartProfiler(entries, interval, features, threads); + + await functionA(); + + const profile = await stopNowAndGetProfile(); + const [thread] = profile.threads; + const { samples } = thread; + const message = "eventDelay > 0 not found."; + let SAMPLE_STACK_SLOT = thread.samples.schema.eventDelay; + + for (let i = 0; i < samples.data.length; i++) { + if (samples.data[i][SAMPLE_STACK_SLOT] > 0) { + Assert.ok(true, message); + return; + } + } + Assert.ok(false, message); +}); + +function doSyncWork(milliseconds) { + const start = Date.now(); + while (true) { + this.n = 0; + for (let i = 0; i < 1e5; i++) { + this.n += Math.random(); + } + if (Date.now() - start > milliseconds) { + return; + } + } +} + +async function functionA() { + doSyncWork(100); + return captureAtLeastOneJsSample(); +} diff --git a/tools/profiler/tests/xpcshell/test_run.js b/tools/profiler/tests/xpcshell/test_run.js new file mode 100644 index 0000000000..0e30edfd4e --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_run.js @@ -0,0 +1,37 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + Assert.ok(!Services.profiler.IsActive()); + + Services.profiler.StartProfiler(1000, 10, []); + + Assert.ok(Services.profiler.IsActive()); + + do_test_pending(); + + do_timeout(1000, function wait() { + // Check text profile format + var profileStr = Services.profiler.GetProfile(); + Assert.ok(profileStr.length > 10); + + // check json profile format + var profileObj = Services.profiler.getProfileData(); + Assert.notEqual(profileObj, null); + Assert.notEqual(profileObj.threads, null); + // We capture memory counters by default only when jemalloc is turned + // on (and it isn't for ASAN), so unless we can conditionalize for ASAN + // here we can't check that we're capturing memory counter data. + Assert.notEqual(profileObj.counters, null); + Assert.notEqual(profileObj.memory, null); + Assert.ok(profileObj.threads.length >= 1); + Assert.notEqual(profileObj.threads[0].samples, null); + // NOTE: The number of samples will be empty since we + // don't have any labels in the xpcshell code + + Services.profiler.StopProfiler(); + Assert.ok(!Services.profiler.IsActive()); + do_test_finished(); + }); +} diff --git a/tools/profiler/tests/xpcshell/test_shared_library.js b/tools/profiler/tests/xpcshell/test_shared_library.js new file mode 100644 index 0000000000..e211ca642b --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_shared_library.js @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + var libs = Services.profiler.sharedLibraries; + + Assert.equal(typeof libs, "object"); + Assert.ok(Array.isArray(libs)); + Assert.equal(typeof libs, "object"); + Assert.ok(libs.length >= 1); + Assert.equal(typeof libs[0], "object"); + Assert.equal(typeof libs[0].name, "string"); + Assert.equal(typeof libs[0].path, "string"); + Assert.equal(typeof libs[0].debugName, "string"); + Assert.equal(typeof libs[0].debugPath, "string"); + Assert.equal(typeof libs[0].arch, "string"); + Assert.equal(typeof libs[0].start, "number"); + Assert.equal(typeof libs[0].end, "number"); + Assert.ok(libs[0].start <= libs[0].end); +} diff --git a/tools/profiler/tests/xpcshell/test_start.js b/tools/profiler/tests/xpcshell/test_start.js new file mode 100644 index 0000000000..c9ae135eb8 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_start.js @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +add_task(async () => { + Assert.ok(!Services.profiler.IsActive()); + + let startPromise = Services.profiler.StartProfiler(10, 100, []); + + Assert.ok(Services.profiler.IsActive()); + + await startPromise; + Assert.ok(Services.profiler.IsActive()); + + let stopPromise = Services.profiler.StopProfiler(); + + Assert.ok(!Services.profiler.IsActive()); + + await stopPromise; + Assert.ok(!Services.profiler.IsActive()); +}); diff --git a/tools/profiler/tests/xpcshell/xpcshell.ini b/tools/profiler/tests/xpcshell/xpcshell.ini new file mode 100644 index 0000000000..a7c461b4ac --- /dev/null +++ b/tools/profiler/tests/xpcshell/xpcshell.ini @@ -0,0 +1,72 @@ +[DEFAULT] +head = head.js +support-files = + ../shared-head.js + +[test_active_configuration.js] +skip-if = tsan # Intermittent timeouts, bug 1781449 +[test_addProfilerMarker.js] +[test_start.js] +skip-if = true +[test_get_features.js] +[test_responsiveness.js] +skip-if = tsan # Times out on TSan, bug 1612707 +[test_shared_library.js] +[test_run.js] +skip-if = true +[test_pause.js] +[test_enterjit_osr.js] +[test_enterjit_osr_disabling.js] +skip-if = !debug +[test_enterjit_osr_enabling.js] +skip-if = !debug +[test_asm.js] +[test_feature_mainthreadio.js] +skip-if = + release_or_beta + os == "win" && socketprocess_networking +[test_feature_fileioall.js] +skip-if = + release_or_beta + +# The sanitizer checks appears to overwrite our own memory hooks in xpcshell tests, +# and no allocation markers are gathered. Skip this test in that configuration. +[test_feature_nativeallocations.js] +skip-if = + os == "android" && verify # bug 1757528 + asan + tsan + socketprocess_networking + +# Native stackwalking is somewhat unreliable depending on the platform. +# +# We don't have frame pointers on macOS release and beta, so stack walking does not +# work. See Bug 1571216 for more details. +# +# Linux can be very unreliable when native stackwalking through JavaScript code. +# See Bug 1434402 for more details. +# +# For sanitizer builds, there were many intermittents, and we're not getting much +# additional coverage there, so it's better to be a bit more reliable. +[test_feature_stackwalking.js] +skip-if = + os == "mac" && release_or_beta + os == "linux" && release_or_beta && !debug + asan + tsan + +[test_feature_js.js] +skip-if = tsan # Times out on TSan, bug 1612707 + +# See the comment on test_feature_stackwalking.js +[test_merged_stacks.js] +skip-if = + os == "mac" && release_or_beta + os == "linux" && release_or_beta && !debug + asan + tsan + +[test_assertion_helper.js] +[test_feature_java.js] +skip-if = + os != "android" -- cgit v1.2.3