summaryrefslogtreecommitdiffstats
path: root/mozglue/baseprofiler
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mozglue/baseprofiler/build/generate_profiling_categories.py335
-rw-r--r--mozglue/baseprofiler/build/profiling_categories.yaml303
-rw-r--r--mozglue/baseprofiler/core/BaseAndGeckoProfilerDetail.cpp92
-rw-r--r--mozglue/baseprofiler/core/EHABIStackWalk.cpp592
-rw-r--r--mozglue/baseprofiler/core/EHABIStackWalk.h30
-rw-r--r--mozglue/baseprofiler/core/PageInformation.cpp49
-rw-r--r--mozglue/baseprofiler/core/PageInformation.h77
-rw-r--r--mozglue/baseprofiler/core/PlatformMacros.h130
-rw-r--r--mozglue/baseprofiler/core/ProfileBuffer.cpp217
-rw-r--r--mozglue/baseprofiler/core/ProfileBuffer.h211
-rw-r--r--mozglue/baseprofiler/core/ProfileBufferEntry.cpp1341
-rw-r--r--mozglue/baseprofiler/core/ProfileBufferEntry.h364
-rw-r--r--mozglue/baseprofiler/core/ProfileJSONWriter.cpp101
-rw-r--r--mozglue/baseprofiler/core/ProfiledThreadData.cpp191
-rw-r--r--mozglue/baseprofiler/core/ProfiledThreadData.h120
-rw-r--r--mozglue/baseprofiler/core/ProfilerBacktrace.cpp125
-rw-r--r--mozglue/baseprofiler/core/ProfilerBacktrace.h162
-rw-r--r--mozglue/baseprofiler/core/ProfilerMarkers.cpp415
-rw-r--r--mozglue/baseprofiler/core/ProfilerUtils.cpp162
-rw-r--r--mozglue/baseprofiler/core/ProfilingCategory.cpp71
-rw-r--r--mozglue/baseprofiler/core/ProfilingStack.cpp52
-rw-r--r--mozglue/baseprofiler/core/RegisteredThread.cpp42
-rw-r--r--mozglue/baseprofiler/core/RegisteredThread.h164
-rw-r--r--mozglue/baseprofiler/core/ThreadInfo.h62
-rw-r--r--mozglue/baseprofiler/core/VTuneProfiler.cpp92
-rw-r--r--mozglue/baseprofiler/core/VTuneProfiler.h84
-rw-r--r--mozglue/baseprofiler/core/platform-linux-android.cpp513
-rw-r--r--mozglue/baseprofiler/core/platform-macos.cpp221
-rw-r--r--mozglue/baseprofiler/core/platform-win32.cpp297
-rw-r--r--mozglue/baseprofiler/core/platform.cpp3830
-rw-r--r--mozglue/baseprofiler/core/platform.h149
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-linux.cc855
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-macos.cc206
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-win32.cc198
-rw-r--r--mozglue/baseprofiler/core/vtune/ittnotify.h4127
-rw-r--r--mozglue/baseprofiler/lul/AutoObjectMapper.cpp80
-rw-r--r--mozglue/baseprofiler/lul/AutoObjectMapper.h64
-rw-r--r--mozglue/baseprofiler/lul/LulCommon.cpp102
-rw-r--r--mozglue/baseprofiler/lul/LulCommonExt.h509
-rw-r--r--mozglue/baseprofiler/lul/LulDwarf.cpp2252
-rw-r--r--mozglue/baseprofiler/lul/LulDwarfExt.h1289
-rw-r--r--mozglue/baseprofiler/lul/LulDwarfInt.h193
-rw-r--r--mozglue/baseprofiler/lul/LulDwarfSummariser.cpp553
-rw-r--r--mozglue/baseprofiler/lul/LulDwarfSummariser.h64
-rw-r--r--mozglue/baseprofiler/lul/LulElf.cpp871
-rw-r--r--mozglue/baseprofiler/lul/LulElfExt.h69
-rw-r--r--mozglue/baseprofiler/lul/LulElfInt.h218
-rw-r--r--mozglue/baseprofiler/lul/LulMain.cpp1958
-rw-r--r--mozglue/baseprofiler/lul/LulMain.h378
-rw-r--r--mozglue/baseprofiler/lul/LulMainInt.h420
-rw-r--r--mozglue/baseprofiler/lul/platform-linux-lul.cpp76
-rw-r--r--mozglue/baseprofiler/lul/platform-linux-lul.h21
-rw-r--r--mozglue/baseprofiler/moz.build133
-rw-r--r--mozglue/baseprofiler/public/BaseAndGeckoProfilerDetail.h67
-rw-r--r--mozglue/baseprofiler/public/BaseProfileJSONWriter.h600
-rw-r--r--mozglue/baseprofiler/public/BaseProfiler.h506
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerCounts.h281
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerDetail.h285
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerLabels.h178
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h125
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerMarkers.h255
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h741
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h968
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerRAIIMacro.h15
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h177
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerState.h412
-rw-r--r--mozglue/baseprofiler/public/BaseProfilerUtils.h227
-rw-r--r--mozglue/baseprofiler/public/BaseProfilingCategory.h68
-rw-r--r--mozglue/baseprofiler/public/BaseProfilingStack.h518
-rw-r--r--mozglue/baseprofiler/public/FailureLatch.h217
-rw-r--r--mozglue/baseprofiler/public/ModuloBuffer.h618
-rw-r--r--mozglue/baseprofiler/public/PowerOfTwo.h322
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferChunk.h547
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferChunkManager.h134
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h172
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h444
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h203
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferEntryKinds.h104
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h1184
-rw-r--r--mozglue/baseprofiler/public/ProfileBufferIndex.h97
-rw-r--r--mozglue/baseprofiler/public/ProfileChunkedBuffer.h1560
-rw-r--r--mozglue/baseprofiler/public/ProfileChunkedBufferDetail.h401
-rw-r--r--mozglue/baseprofiler/public/ProfilerBufferSize.h60
-rw-r--r--mozglue/baseprofiler/public/ProgressLogger.h500
-rw-r--r--mozglue/baseprofiler/public/ProportionValue.h235
-rw-r--r--mozglue/baseprofiler/public/leb128iterator.h207
86 files changed, 37658 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/build/generate_profiling_categories.py b/mozglue/baseprofiler/build/generate_profiling_categories.py
new file mode 100644
index 0000000000..fb632d1fcf
--- /dev/null
+++ b/mozglue/baseprofiler/build/generate_profiling_categories.py
@@ -0,0 +1,335 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script generates ProfilingCategoryList.h and profiling_categories.rs
+# files from profiling_categories.yaml.
+
+import yaml
+
+CPP_HEADER_TEMPLATE = """\
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef {includeguard}
+#define {includeguard}
+
+/* This file is generated by generate_profiling_categories.py from
+ profiling_categories.yaml. DO NOT EDIT! */
+
+// Profiler sub-categories are applied to each sampled stack to describe the
+// type of workload that the CPU is busy with. Only one sub-category can be
+// assigned so be mindful that these are non-overlapping. The active category is
+// set by pushing a label to the profiling stack, or by the unwinder in cases
+// such as JITs. A profile sample in arbitrary C++/Rust will typically be
+// categorized based on the top of the label stack.
+//
+// The list of available color names for categories is:
+// transparent
+// blue
+// green
+// grey
+// lightblue
+// magenta
+// orange
+// purple
+// yellow
+
+// clang-format off
+
+{contents}
+
+// clang-format on
+
+#endif // {includeguard}
+"""
+
+CPP_MACRO_DEFINITION = """\
+#define MOZ_PROFILING_CATEGORY_LIST(BEGIN_CATEGORY, SUBCATEGORY, END_CATEGORY) \\
+"""
+
+RUST_TEMPLATE = """\
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file is generated by generate_profiling_categories.py from
+ profiling_categories.yaml. DO NOT EDIT! */
+
+{contents}\
+"""
+
+RUST_ENUM_TEMPLATE = """\
+#[repr(u32)]
+#[derive(Debug, Copy, Clone)]
+pub enum {name} {{
+{fields}
+}}
+"""
+
+RUST_CONVERSION_IMPL_TEMPLATE = """\
+impl {name} {{
+ pub fn to_cpp_enum_value(&self) -> u32 {{
+{content}
+ }}
+}}
+"""
+
+RUST_DEFAULT_IMPL_TEMPLATE = """\
+impl Default for {name} {{
+ fn default() -> Self {{
+{content}
+ }}
+}}
+"""
+
+RUST_MATCH_SELF = """\
+ match *self {{
+{fields}
+ }}
+"""
+
+
+def generate_header(c_out, includeguard, contents):
+ c_out.write(
+ CPP_HEADER_TEMPLATE.format(includeguard=includeguard, contents=contents)
+ )
+
+
+def generate_rust_file(c_out, contents):
+ c_out.write(RUST_TEMPLATE.format(contents=contents))
+
+
+def load_yaml(yaml_path):
+ file_handler = open(yaml_path)
+ return yaml.safe_load(file_handler)
+
+
+def generate_category_macro(name, label, color, subcategories):
+ contents = ' BEGIN_CATEGORY({name}, "{label}", "{color}") \\\n'.format(
+ name=name, label=label, color=color
+ )
+
+ subcategory_items = []
+
+ for subcategory in subcategories:
+ subcat_name = subcategory["name"]
+ assert isinstance(subcat_name, str)
+ subcat_label = subcategory["label"]
+ assert isinstance(subcat_label, str)
+
+ subcategory_items.append(
+ ' SUBCATEGORY({parent_cat}, {name}, "{label}") \\\n'.format(
+ parent_cat=name, name=subcat_name, label=subcat_label
+ )
+ )
+
+ contents += "".join(subcategory_items)
+ contents += " END_CATEGORY"
+
+ return contents
+
+
+def generate_macro_header(c_out, yaml_path):
+ """Generate ProfilingCategoryList.h from profiling_categories.yaml.
+ The generated file has a macro to generate the profiling category enums.
+ """
+
+ data = load_yaml(yaml_path)
+
+ # Stores the macro definition of each categories.
+ category_items = []
+
+ for category in data:
+ name = category["name"]
+ assert isinstance(name, str)
+ label = category["label"]
+ assert isinstance(label, str)
+ color = category["color"]
+ assert isinstance(color, str)
+ subcategories = category.get("subcategories", None)
+ assert (
+ isinstance(subcategories, list) and len(subcategories) > 0
+ ), "At least one subcategory expected as default in {}.".format(name)
+
+ category_items.append(
+ generate_category_macro(name, label, color, subcategories)
+ )
+
+ contents = CPP_MACRO_DEFINITION
+ contents += " \\\n".join(category_items)
+
+ generate_header(c_out, "baseprofiler_ProfilingCategoryList_h", contents)
+
+
+class RustEnum:
+ """Class that keeps the rust enum fields and impls.
+ This is used for generating the Rust ProfilingCategoryPair and ProfilingCategory
+ enums as well as ProfilingCategoryPair's sub category enums.
+ For example, this can either generate an enum with discrimant fields for sub
+ category enums and ProfilingCategory:
+ ```
+ #[repr(u32)]
+ #[derive(Debug, Copy, Clone)]
+ pub enum Graphics {
+ LayerBuilding = 0,
+ ...
+ }
+ ```
+ or can generate an enum with optional tuple values for ProfilingCategoryPair
+ to explicitly mention their sub categories:
+ ```
+ #[repr(u32)]
+ #[derive(Debug, Copy, Clone)]
+ pub enum ProfilingCategoryPair {
+ Network(Option<Network>),
+ ...
+ }
+ ```
+
+ And in addition to enums, it will generate impls for each enum. See one
+ example below:
+ ```
+ impl Default for Network {
+ fn default() -> Self {
+ Network::Other
+ }
+ }
+ ```
+ """
+
+ def __init__(self, name):
+ # Name of the Rust enum.
+ self.name = name
+ # Fields of the Rust enum. This list contains elements of
+ # (field_name, field_string) tuple for convenience.
+ self.fields = []
+ # Impls of the Rust enum. Each element is a string.
+ self.impls = []
+ # Default category of the Rust enum. Main enums won't have it, but all
+ # sub category enums must have one. This is being checked later.
+ self.default_category = None
+
+ def append_optional_tuple_field(self, field_name):
+ """Append the enum fields list with an optional tuple field."""
+ field = (field_name, " {name}(Option<{name}>),".format(name=field_name))
+ self.fields.append(field)
+
+ def append_discriminant_field(self, field_name, field_value):
+ """Append the enum fields list with a discriminant field."""
+ field = (
+ field_name,
+ " {name} = {value},".format(name=field_name, value=field_value),
+ )
+ self.fields.append(field)
+
+ def append_default_impl(self, default_category):
+ """Append the enum impls list with a default implementation."""
+ self.default_category = default_category
+
+ self.impls.append(
+ RUST_DEFAULT_IMPL_TEMPLATE.format(
+ name=self.name,
+ content=" {category}::{subcategory}".format(
+ category=self.name, subcategory=self.default_category
+ ),
+ )
+ )
+
+ def append_conversion_impl(self, content):
+ """Append the enum impls list with a conversion implementation for cpp values."""
+ self.impls.append(
+ RUST_CONVERSION_IMPL_TEMPLATE.format(name=self.name, content=content)
+ )
+
+ def to_rust_string(self):
+ """Serialize the enum with its impls as a string"""
+ joined_fields = "\n".join(map(lambda field: field[1], self.fields))
+ result = RUST_ENUM_TEMPLATE.format(name=self.name, fields=joined_fields)
+ result += "\n"
+ result += "\n".join(self.impls)
+ return result
+
+
+def generate_rust_enums(c_out, yaml_path):
+ """Generate profiling_categories.rs from profiling_categories.yaml.
+ The generated file has a profiling category enums and their impls.
+ """
+
+ data = load_yaml(yaml_path)
+
+ # Each category has its own enum for keeping its subcategories. We are
+ # keeping all of them here.
+ enums = []
+ # Parent enums for prifiling category and profiling category pair. They will
+ # be appended to the end of the `enums`.
+ profiling_category_pair_enum = RustEnum("ProfilingCategoryPair")
+ profiling_category_enum = RustEnum("ProfilingCategory")
+ profiling_category_pair_value = 0
+
+ for cat_index, category in enumerate(data):
+ cat_name = category["name"]
+ assert isinstance(cat_name, str)
+ cat_label = category["label"]
+ assert isinstance(cat_label, str)
+ # This will be used as our main enum field and sub category enum.
+ cat_label = "".join(filter(str.isalnum, cat_label))
+ cat_subcategories = category.get("subcategories", None)
+ assert (
+ isinstance(cat_subcategories, list) and len(cat_subcategories) > 0
+ ), "At least one subcategory expected as default in {}.".format(cat_name)
+
+ # Create a new enum for this sub category and append it to the enums list.
+ category_enum = RustEnum(cat_label)
+ enums.append(category_enum)
+
+ for subcategory in cat_subcategories:
+ subcat_name = subcategory["name"]
+ assert isinstance(subcat_name, str)
+ subcat_label = subcategory["label"]
+ assert isinstance(subcat_label, str)
+ friendly_subcat_name = None
+
+ if cat_name == subcat_name:
+ # This is the default sub-category. It should use the label as name.
+ friendly_subcat_name = subcat_label
+ category_enum.append_default_impl(subcat_label)
+ else:
+ # This is a non-default sub-category.
+ underscore_pos = subcat_name.find("_")
+ friendly_subcat_name = subcat_name[underscore_pos + 1 :]
+
+ friendly_subcat_name = "".join(filter(str.isalnum, friendly_subcat_name))
+ category_enum.append_discriminant_field(
+ friendly_subcat_name, profiling_category_pair_value
+ )
+ profiling_category_pair_value += 1
+
+ assert (
+ category_enum.default_category is not None
+ ), "There must be a default subcategory with the same name."
+
+ # Append the main enums.
+ profiling_category_pair_enum.append_optional_tuple_field(cat_label)
+ profiling_category_enum.append_discriminant_field(cat_label, cat_index)
+
+ # Add the main enums impls for conversion into cpp values.
+ profiling_category_pair_impl_fields = "\n".join(
+ " {enum_name}::{field_name}(val) => val.unwrap_or_default() as u32,".format(
+ enum_name="ProfilingCategoryPair", field_name=field
+ )
+ for field, _ in profiling_category_pair_enum.fields
+ )
+ profiling_category_pair_enum.append_conversion_impl(
+ RUST_MATCH_SELF.format(fields=profiling_category_pair_impl_fields)
+ )
+ profiling_category_enum.append_conversion_impl(" *self as u32")
+
+ # After adding all the sub category enums, we can add the main enums to the list.
+ enums.append(profiling_category_pair_enum)
+ enums.append(profiling_category_enum)
+
+ # Print all the enums and their impls.
+ contents = "\n".join(map(lambda enum: enum.to_rust_string(), enums))
+ generate_rust_file(c_out, contents)
diff --git a/mozglue/baseprofiler/build/profiling_categories.yaml b/mozglue/baseprofiler/build/profiling_categories.yaml
new file mode 100644
index 0000000000..602974db68
--- /dev/null
+++ b/mozglue/baseprofiler/build/profiling_categories.yaml
@@ -0,0 +1,303 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Profiling categories
+# ====================
+# This file defines all profiling categories with their sub-categories. It is
+# parsed by generate_profiling_categories.py at build time to create
+# ProfilingCategoryList.h and profiling_categories.rs files.
+#
+# Profiling sub-categories are applied to each sampled stack to describe the
+# type of workload that the CPU is busy with. Only one sub-category can be
+# assigned so be mindful that these are non-overlapping. The active category is
+# set by pushing a label to the profiling stack, or by the unwinder in cases
+# such as JITs. A profile sample in arbitrary C++/Rust will typically be
+# categorized based on the top of the label stack.
+#
+# Each category consists of a name and a set of attributes that are described below:
+#
+# name [required]
+# ====
+# Name of the profiling category. This will be used in the C++ enum fields (not
+# by Rust).
+#
+# label [required]
+# =====
+# Label of the profiling category. This a more human readable string for the
+# category. Label will be displayed in the Firefox Profiler front-end. But also
+# this will be used as a Rust enum field (with non-alphanumeric characters
+# removed) because it's more idiomatic for Rust enums than name fields (which
+# are snake cased fields with all caps, which is not idiomatic to rust enum
+# field).
+#
+# color [required]
+# =====
+# Color that this category will show up as in the Firefox Profiler front-end.
+# The list of available color names for categories is:
+# - transparent
+# - blue
+# - green
+# - grey
+# - lightblue
+# - magenta
+# - orange
+# - purple
+# - yellow
+#
+# subcategories [required]
+# =============
+# A list of sub-categories that belong to this category.
+# There must be at least one sub-category for each category and there must be at
+# least one category with the same name as the category to indicate the default
+# sub-category. Each sub-category must have name and label attributes.
+#
+# name attribute should either be the same as the category (for default
+# sub-category) or should start with parent category name + underscore
+# (e.g. JS_Parsing).
+#
+# label attribute has the same purpose as parent category label attribute.
+#
+# For example:
+# - name: JS
+# subcategories:
+# - name: JS
+# label: Other
+# - name: JS_Parsing
+# label: Parsing
+#
+# Note that the first sub-category has the same name with the category. This is
+# the default sub-category. Also note the other sub-categories starting with the
+# category name + underscore.
+#
+
+- name: IDLE
+ label: Idle
+ color: transparent
+ subcategories:
+ - name: IDLE
+ label: Other
+
+- name: OTHER
+ label: Other
+ color: grey
+ subcategories:
+ - name: OTHER
+ label: Other
+ - name: OTHER_PreferenceRead
+ label: Preference Read
+ - name: OTHER_Profiling
+ label: Profiling
+
+- name: TEST
+ label: Test
+ color: darkgray
+ subcategories:
+ - name: TEST
+ label: Test
+
+- name: LAYOUT
+ label: Layout
+ color: purple
+ subcategories:
+ - name: LAYOUT
+ label: Other
+ - name: LAYOUT_FrameConstruction
+ label: Frame construction
+ - name: LAYOUT_Reflow
+ label: Reflow
+ - name: LAYOUT_CSSParsing
+ label: CSS parsing
+ - name: LAYOUT_SelectorQuery
+ label: Selector query
+ - name: LAYOUT_StyleComputation
+ label: Style computation
+ - name: LAYOUT_Destroy
+ label: Layout cleanup
+ - name: LAYOUT_Printing
+ label: Printing
+
+- name: JS
+ label: JavaScript
+ color: yellow
+ subcategories:
+ - name: JS
+ label: Other
+ - name: JS_Parsing
+ label: Parsing
+ - name: JS_BaselineCompilation
+ label: JIT Compile (baseline)
+ - name: JS_IonCompilation
+ label: JIT Compile (ion)
+ - name: JS_Interpreter
+ label: Interpreter
+ - name: JS_BaselineInterpret
+ label: JIT (baseline-interpreter)
+ - name: JS_Baseline
+ label: JIT (baseline)
+ - name: JS_IonMonkey
+ label: JIT (ion)
+ - name: JS_Builtin
+ label: Builtin API
+ - name: JS_Wasm
+ label: Wasm
+
+- name: GCCC
+ label: GC / CC
+ color: orange
+ subcategories:
+ - name: GCCC
+ label: Other
+ - name: GCCC_MinorGC
+ label: Minor GC
+ - name: GCCC_MajorGC
+ label: Major GC (Other)
+ - name: GCCC_MajorGC_Mark
+ label: Major GC (Mark)
+ - name: GCCC_MajorGC_Sweep
+ label: Major GC (Sweep)
+ - name: GCCC_MajorGC_Compact
+ label: Major GC (Compact)
+ - name: GCCC_UnmarkGray
+ label: Unmark Gray
+ - name: GCCC_Barrier
+ label: Barrier
+ - name: GCCC_FreeSnowWhite
+ label: CC (Free Snow White)
+ - name: GCCC_BuildGraph
+ label: CC (Build Graph)
+ - name: GCCC_ScanRoots
+ label: CC (Scan Roots)
+ - name: GCCC_CollectWhite
+ label: CC (Collect White)
+ - name: GCCC_Finalize
+ label: CC (Finalize)
+
+- name: NETWORK
+ label: Network
+ color: lightblue
+ subcategories:
+ - name: NETWORK
+ label: Other
+
+- name: GRAPHICS
+ label: Graphics
+ color: green
+ subcategories:
+ - name: GRAPHICS
+ label: Other
+ - name: GRAPHICS_DisplayListBuilding
+ label: DisplayList building
+ - name: GRAPHICS_DisplayListMerging
+ label: DisplayList merging
+ - name: GRAPHICS_LayerBuilding
+ label: Layer building
+ - name: GRAPHICS_TileAllocation
+ label: Tile allocation
+ - name: GRAPHICS_WRDisplayList
+ label: WebRender display list
+ - name: GRAPHICS_Rasterization
+ label: Rasterization
+ - name: GRAPHICS_FlushingAsyncPaints
+ label: Flushing async paints
+ - name: GRAPHICS_ImageDecoding
+ label: Image decoding
+
+- name: DOM
+ label: DOM
+ color: blue
+ subcategories:
+ - name: DOM
+ label: Other
+
+- name: JAVA_ANDROID
+ label: Android
+ color: yellow
+ subcategories:
+ - name: JAVA_ANDROID
+ label: Other
+
+- name: JAVA_ANDROIDX
+ label: AndroidX
+ color: orange
+ subcategories:
+ - name: JAVA_ANDROIDX
+ label: Other
+
+- name: JAVA_LANGUAGE
+ label: Java
+ color: blue
+ subcategories:
+ - name: JAVA_LANGUAGE
+ label: Other
+
+- name: JAVA_MOZILLA
+ label: Mozilla
+ color: green
+ subcategories:
+ - name: JAVA_MOZILLA
+ label: Other
+
+- name: JAVA_KOTLIN
+ label: Kotlin
+ color: purple
+ subcategories:
+ - name: JAVA_KOTLIN
+ label: Other
+
+- name: JAVA_BLOCKED
+ label: Blocked
+ color: lightblue
+ subcategories:
+ - name: JAVA_BLOCKED
+ label: Other
+
+- name: IPC
+ label: IPC
+ color: lightgreen
+ subcategories:
+ - name: IPC
+ label: Other
+
+- name: MEDIA
+ label: Media
+ color: orange
+ subcategories:
+ - name: MEDIA
+ label: Other
+ - name: MEDIA_CUBEB
+ label: Cubeb
+ - name: MEDIA_PLAYBACK
+ label: Playback
+ - name: MEDIA_RT
+ label: Real-time rendering
+
+# We don't name this category ACCESSIBILITY
+# because it's already defined as a macro.
+- name: A11Y
+ label: Accessibility
+ color: brown
+ subcategories:
+ - name: A11Y
+ label: Other
+
+- name: PROFILER
+ label: Profiler
+ color: lightred
+ subcategories:
+ - name: PROFILER
+ label: Other
+
+- name: TIMER
+ label: Timer
+ color: grey
+ subcategories:
+ - name: TIMER
+ label: Other
+
+- name: REMOTE_PROTOCOL
+ label: Remote-Protocol
+ color: grey
+ subcategories:
+ - name: REMOTE_PROTOCOL
+ label: Other
diff --git a/mozglue/baseprofiler/core/BaseAndGeckoProfilerDetail.cpp b/mozglue/baseprofiler/core/BaseAndGeckoProfilerDetail.cpp
new file mode 100644
index 0000000000..213f25cf16
--- /dev/null
+++ b/mozglue/baseprofiler/core/BaseAndGeckoProfilerDetail.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseAndGeckoProfilerDetail.h"
+
+#include <limits>
+#include <string_view>
+
+namespace mozilla::profiler::detail {
+
+constexpr std::string_view scPidPrefix = "pid:";
+
+// Convert a C string to a BaseProfilerProcessId. Return unspecified
+// BaseProfilerProcessId if the string is not exactly a valid pid.
+static baseprofiler::BaseProfilerProcessId StringToPid(const char* aString) {
+ if (!aString || aString[0] == '\0') {
+ // Null or empty.
+ return baseprofiler::BaseProfilerProcessId{};
+ }
+
+ if (aString[0] == '0') {
+ if (aString[1] != '\0') {
+ // Don't accept leading zeroes.
+ return baseprofiler::BaseProfilerProcessId{};
+ }
+ return baseprofiler::BaseProfilerProcessId::FromNumber(0);
+ }
+
+ using PidNumber = baseprofiler::BaseProfilerProcessId::NumberType;
+ PidNumber pid = 0;
+ for (;;) {
+ const char c = *aString;
+ if (c == '\0') {
+ break;
+ }
+ if (c < '0' || c > '9') {
+ // Only accept decimal digits.
+ return baseprofiler::BaseProfilerProcessId{};
+ }
+ static_assert(!std::numeric_limits<PidNumber>::is_signed,
+ "The following relies on unsigned arithmetic");
+ PidNumber newPid = pid * 10u + PidNumber(c - '0');
+ if (newPid < pid) {
+ // Unsigned overflow.
+ return baseprofiler::BaseProfilerProcessId{};
+ }
+ pid = newPid;
+ ++aString;
+ }
+ return baseprofiler::BaseProfilerProcessId::FromNumber(pid);
+}
+
+[[nodiscard]] MFBT_API bool FilterHasPid(
+ const char* aFilter, baseprofiler::BaseProfilerProcessId aPid) {
+ if (strncmp(aFilter, scPidPrefix.data(), scPidPrefix.length()) != 0) {
+ // The filter is not starting with "pid:".
+ return false;
+ }
+
+ return StringToPid(aFilter + scPidPrefix.length()) == aPid;
+}
+
+[[nodiscard]] MFBT_API bool FiltersExcludePid(
+ Span<const char* const> aFilters,
+ baseprofiler::BaseProfilerProcessId aPid) {
+ if (aFilters.empty()) {
+ return false;
+ }
+
+ // First, check if the list only contains "pid:..." strings.
+ for (const char* const filter : aFilters) {
+ if (strncmp(filter, scPidPrefix.data(), scPidPrefix.length()) != 0) {
+ // At least one filter is *not* a "pid:...", our pid is not excluded.
+ return false;
+ }
+ }
+
+ // Here, all filters start with "pid:". Check if the given pid is included.
+ for (const char* const filter : aFilters) {
+ if (StringToPid(filter + scPidPrefix.length()) == aPid) {
+ // Our pid is present, so it's not excluded.
+ return false;
+ }
+ }
+ // Our pid was not in a list of only pids, so it's excluded.
+ return true;
+}
+
+} // namespace mozilla::profiler::detail
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.cpp b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
new file mode 100644
index 0000000000..0c2c855c9b
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
@@ -0,0 +1,592 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI, as described in:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
+ *
+ * This handles only the ARM-defined "personality routines" (chapter
+ * 9), and don't track the value of FP registers, because profiling
+ * needs only chain of PC/SP values.
+ *
+ * Because the exception handling info may not be accurate for all
+ * possible places where an async signal could occur (e.g., in a
+ * prologue or epilogue), this bounds-checks all stack accesses.
+ *
+ * This file uses "struct" for structures in the exception tables and
+ * "class" otherwise. We should avoid violating the C++11
+ * standard-layout rules in the former.
+ */
+
+#include "BaseProfiler.h"
+
+#include "EHABIStackWalk.h"
+
+#include "BaseProfilerSharedLibraries.h"
+#include "platform.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <stdint.h>
+#include <vector>
+#include <string>
+
+#ifndef PT_ARM_EXIDX
+# define PT_ARM_EXIDX 0x70000001
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+struct PRel31 {
+ uint32_t mBits;
+ bool topBit() const { return mBits & 0x80000000; }
+ uint32_t value() const { return mBits & 0x7fffffff; }
+ int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
+ const void* compute() const {
+ return reinterpret_cast<const char*>(this) + offset();
+ }
+
+ private:
+ PRel31(const PRel31& copied) = delete;
+ PRel31() = delete;
+};
+
+struct EHEntry {
+ PRel31 startPC;
+ PRel31 exidx;
+
+ private:
+ EHEntry(const EHEntry& copied) = delete;
+ EHEntry() = delete;
+};
+
+class EHState {
+ // Note that any core register can be used as a "frame pointer" to
+ // influence the unwinding process, so this must track all of them.
+ uint32_t mRegs[16];
+
+ public:
+ bool unwind(const EHEntry* aEntry, const void* stackBase);
+ uint32_t& operator[](int i) { return mRegs[i]; }
+ const uint32_t& operator[](int i) const { return mRegs[i]; }
+ explicit EHState(const mcontext_t&);
+};
+
+enum { R_SP = 13, R_LR = 14, R_PC = 15 };
+
+class EHTable {
+ uint32_t mStartPC;
+ uint32_t mEndPC;
+ uint32_t mBaseAddress;
+ const EHEntry* mEntriesBegin;
+ const EHEntry* mEntriesEnd;
+ std::string mName;
+
+ public:
+ EHTable(const void* aELF, size_t aSize, const std::string& aName);
+ const EHEntry* lookup(uint32_t aPC) const;
+ bool isValid() const { return mEntriesEnd != mEntriesBegin; }
+ const std::string& name() const { return mName; }
+ uint32_t startPC() const { return mStartPC; }
+ uint32_t endPC() const { return mEndPC; }
+ uint32_t baseAddress() const { return mBaseAddress; }
+};
+
+class EHAddrSpace {
+ std::vector<uint32_t> mStarts;
+ std::vector<EHTable> mTables;
+ static Atomic<const EHAddrSpace*> sCurrent;
+
+ public:
+ explicit EHAddrSpace(const std::vector<EHTable>& aTables);
+ const EHTable* lookup(uint32_t aPC) const;
+ static void Update();
+ static const EHAddrSpace* Get();
+};
+
+void EHABIStackWalkInit() { EHAddrSpace::Update(); }
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+ void** aPCs, const size_t aNumFrames) {
+ const EHAddrSpace* space = EHAddrSpace::Get();
+ EHState state(aContext);
+ size_t count = 0;
+
+ while (count < aNumFrames) {
+ uint32_t pc = state[R_PC], sp = state[R_SP];
+ aPCs[count] = reinterpret_cast<void*>(pc);
+ aSPs[count] = reinterpret_cast<void*>(sp);
+ count++;
+
+ if (!space) break;
+ // TODO: cache these lookups. Binary-searching libxul is
+ // expensive (possibly more expensive than doing the actual
+ // unwind), and even a small cache should help.
+ const EHTable* table = space->lookup(pc);
+ if (!table) break;
+ const EHEntry* entry = table->lookup(pc);
+ if (!entry) break;
+ if (!state.unwind(entry, stackBase)) break;
+ }
+
+ return count;
+}
+
+class EHInterp {
+ public:
+ // Note that stackLimit is exclusive and stackBase is inclusive
+ // (i.e, stackLimit < SP <= stackBase), following the convention
+ // set by the AAPCS spec.
+ EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit,
+ uint32_t aStackBase)
+ : mState(aState),
+ mStackLimit(aStackLimit),
+ mStackBase(aStackBase),
+ mNextWord(0),
+ mWordsLeft(0),
+ mFailed(false) {
+ const PRel31& exidx = aEntry->exidx;
+ uint32_t firstWord;
+
+ if (exidx.mBits == 1) { // EXIDX_CANTUNWIND
+ mFailed = true;
+ return;
+ }
+ if (exidx.topBit()) {
+ firstWord = exidx.mBits;
+ } else {
+ mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute());
+ firstWord = *mNextWord++;
+ }
+
+ switch (firstWord >> 24) {
+ case 0x80: // short
+ mWord = firstWord << 8;
+ mBytesLeft = 3;
+ break;
+ case 0x81:
+ case 0x82: // long; catch descriptor size ignored
+ mWord = firstWord << 16;
+ mBytesLeft = 2;
+ mWordsLeft = (firstWord >> 16) & 0xff;
+ break;
+ default:
+ // unknown personality
+ mFailed = true;
+ }
+ }
+
+ bool unwind();
+
+ private:
+ // TODO: GCC has been observed not CSEing repeated reads of
+ // mState[R_SP] with writes to mFailed between them, suggesting that
+ // it hasn't determined that they can't alias and is thus missing
+ // optimization opportunities. So, we may want to flatten EHState
+ // into this class; this may also make the code simpler.
+ EHState& mState;
+ uint32_t mStackLimit;
+ uint32_t mStackBase;
+ const uint32_t* mNextWord;
+ uint32_t mWord;
+ uint8_t mWordsLeft;
+ uint8_t mBytesLeft;
+ bool mFailed;
+
+ enum {
+ I_ADDSP = 0x00, // 0sxxxxxx (subtract if s)
+ M_ADDSP = 0x80,
+ I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set)
+ M_POPMASK = 0xf0,
+ I_MOVSP = 0x90, // 1001nnnn
+ M_MOVSP = 0xf0,
+ I_POPN = 0xa0, // 1010lnnn
+ M_POPN = 0xf0,
+ I_FINISH = 0xb0, // 10110000
+ I_POPLO = 0xb1, // 10110001 0000iiii (if any i set)
+ I_ADDSPBIG = 0xb2, // 10110010 uleb128
+ I_POPFDX = 0xb3, // 10110011 sssscccc
+ I_POPFDX8 = 0xb8, // 10111nnn
+ M_POPFDX8 = 0xf8,
+ // "Intel Wireless MMX" extensions omitted.
+ I_POPFDD = 0xc8, // 1100100h sssscccc
+ M_POPFDD = 0xfe,
+ I_POPFDD8 = 0xd0, // 11010nnn
+ M_POPFDD8 = 0xf8
+ };
+
+ uint8_t next() {
+ if (mBytesLeft == 0) {
+ if (mWordsLeft == 0) {
+ return I_FINISH;
+ }
+ mWordsLeft--;
+ mWord = *mNextWord++;
+ mBytesLeft = 4;
+ }
+ mBytesLeft--;
+ mWord = (mWord << 8) | (mWord >> 24); // rotate
+ return mWord;
+ }
+
+ uint32_t& vSP() { return mState[R_SP]; }
+ uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); }
+
+ void checkStackBase() {
+ if (vSP() > mStackBase) mFailed = true;
+ }
+ void checkStackLimit() {
+ if (vSP() <= mStackLimit) mFailed = true;
+ }
+ void checkStackAlign() {
+ if ((vSP() & 3) != 0) mFailed = true;
+ }
+ void checkStack() {
+ checkStackBase();
+ checkStackLimit();
+ checkStackAlign();
+ }
+
+ void popRange(uint8_t first, uint8_t last, uint16_t mask) {
+ bool hasSP = false;
+ uint32_t tmpSP;
+ if (mask == 0) mFailed = true;
+ for (uint8_t r = first; r <= last; ++r) {
+ if (mask & 1) {
+ if (r == R_SP) {
+ hasSP = true;
+ tmpSP = *ptrSP();
+ } else
+ mState[r] = *ptrSP();
+ vSP() += 4;
+ checkStackBase();
+ if (mFailed) return;
+ }
+ mask >>= 1;
+ }
+ if (hasSP) {
+ vSP() = tmpSP;
+ checkStack();
+ }
+ }
+};
+
+bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) {
+ // The unwinding program cannot set SP to less than the initial value.
+ uint32_t stackLimit = mRegs[R_SP] - 4;
+ uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
+ EHInterp interp(*this, aEntry, stackLimit, stackBase);
+ return interp.unwind();
+}
+
+bool EHInterp::unwind() {
+ mState[R_PC] = 0;
+ checkStack();
+ while (!mFailed) {
+ uint8_t insn = next();
+#if DEBUG_EHABI_UNWIND
+ LOG("unwind insn = %02x", (unsigned)insn);
+#endif
+ // Try to put the common cases first.
+
+ // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
+ // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
+ if ((insn & M_ADDSP) == I_ADDSP) {
+ uint32_t offset = ((insn & 0x3f) << 2) + 4;
+ if (insn & 0x40) {
+ vSP() -= offset;
+ checkStackLimit();
+ } else {
+ vSP() += offset;
+ checkStackBase();
+ }
+ continue;
+ }
+
+ // 10100nnn: Pop r4-r[4+nnn]
+ // 10101nnn: Pop r4-r[4+nnn], r14
+ if ((insn & M_POPN) == I_POPN) {
+ uint8_t n = (insn & 0x07) + 1;
+ bool lr = insn & 0x08;
+ uint32_t* ptr = ptrSP();
+ vSP() += (n + (lr ? 1 : 0)) * 4;
+ checkStackBase();
+ for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++;
+ if (lr) mState[R_LR] = *ptr++;
+ continue;
+ }
+
+ // 1011000: Finish
+ if (insn == I_FINISH) {
+ if (mState[R_PC] == 0) {
+ mState[R_PC] = mState[R_LR];
+ // Non-standard change (bug 916106): Prevent the caller from
+ // re-using LR. Since the caller is by definition not a leaf
+ // routine, it will have to restore LR from somewhere to
+ // return to its own caller, so we can safely zero it here.
+ // This makes a difference only if an error in unwinding
+ // (e.g., caused by starting from within a prologue/epilogue)
+ // causes us to load a pointer to a leaf routine as LR; if we
+ // don't do something, we'll go into an infinite loop of
+ // "returning" to that same function.
+ mState[R_LR] = 0;
+ }
+ return true;
+ }
+
+ // 1001nnnn: Set vsp = r[nnnn]
+ if ((insn & M_MOVSP) == I_MOVSP) {
+ vSP() = mState[insn & 0x0f];
+ checkStack();
+ continue;
+ }
+
+ // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
+ // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
+ if ((insn & M_POPFDD) == I_POPFDD) {
+ uint8_t n = (next() & 0x0f) + 1;
+ // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
+ // As the space is currently unused, we don't try to check.
+ vSP() += 8 * n;
+ checkStackBase();
+ continue;
+ }
+
+ // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
+ if ((insn & M_POPFDD8) == I_POPFDD8) {
+ uint8_t n = (insn & 0x07) + 1;
+ vSP() += 8 * n;
+ checkStackBase();
+ continue;
+ }
+
+ // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
+ if (insn == I_ADDSPBIG) {
+ uint32_t acc = 0;
+ uint8_t shift = 0;
+ uint8_t byte;
+ do {
+ if (shift >= 32) return false;
+ byte = next();
+ acc |= (byte & 0x7f) << shift;
+ shift += 7;
+ } while (byte & 0x80);
+ uint32_t offset = 0x204 + (acc << 2);
+ // The calculations above could have overflowed.
+ // But the one we care about is this:
+ if (vSP() + offset < vSP()) mFailed = true;
+ vSP() += offset;
+ // ...so that this is the only other check needed:
+ checkStackBase();
+ continue;
+ }
+
+ // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
+ if ((insn & M_POPMASK) == I_POPMASK) {
+ popRange(4, 15, ((insn & 0x0f) << 8) | next());
+ continue;
+ }
+
+ // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
+ if (insn == I_POPLO) {
+ popRange(0, 3, next() & 0x0f);
+ continue;
+ }
+
+ // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
+ if (insn == I_POPFDX) {
+ uint8_t n = (next() & 0x0f) + 1;
+ vSP() += 8 * n + 4;
+ checkStackBase();
+ continue;
+ }
+
+ // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
+ if ((insn & M_POPFDX8) == I_POPFDX8) {
+ uint8_t n = (insn & 0x07) + 1;
+ vSP() += 8 * n + 4;
+ checkStackBase();
+ continue;
+ }
+
+ // unhandled instruction
+#ifdef DEBUG_EHABI_UNWIND
+ LOG("Unhandled EHABI instruction 0x%02x", insn);
+#endif
+ mFailed = true;
+ }
+ return false;
+}
+
+bool operator<(const EHTable& lhs, const EHTable& rhs) {
+ return lhs.startPC() < rhs.startPC();
+}
+
+// Async signal unsafe.
+EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
+ : mTables(aTables) {
+ std::sort(mTables.begin(), mTables.end());
+ DebugOnly<uint32_t> lastEnd = 0;
+ for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end();
+ ++i) {
+ MOZ_ASSERT(i->startPC() >= lastEnd);
+ mStarts.push_back(i->startPC());
+ lastEnd = i->endPC();
+ }
+}
+
+const EHTable* EHAddrSpace::lookup(uint32_t aPC) const {
+ ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) -
+ mStarts.begin()) -
+ 1;
+
+ if (i < 0 || aPC >= mTables[i].endPC()) return 0;
+ return &mTables[i];
+}
+
+const EHEntry* EHTable::lookup(uint32_t aPC) const {
+ MOZ_ASSERT(aPC >= mStartPC);
+ if (aPC >= mEndPC) return nullptr;
+
+ const EHEntry* begin = mEntriesBegin;
+ const EHEntry* end = mEntriesEnd;
+ MOZ_ASSERT(begin < end);
+ if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute()))
+ return nullptr;
+
+ while (end - begin > 1) {
+#ifdef EHABI_UNWIND_MORE_ASSERTS
+ if ((end - 1)->startPC.compute() < begin->startPC.compute()) {
+ MOZ_CRASH("unsorted exidx");
+ }
+#endif
+ const EHEntry* mid = begin + (end - begin) / 2;
+ if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute()))
+ end = mid;
+ else
+ begin = mid;
+ }
+ return begin;
+}
+
+#if MOZ_LITTLE_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2LSB;
+#elif MOZ_BIG_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2MSB;
+#else
+# error "No endian?"
+#endif
+
+// Async signal unsafe: std::vector::reserve, std::string copy ctor.
+EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName)
+ : mStartPC(~0), // largest uint32_t
+ mEndPC(0),
+ mEntriesBegin(nullptr),
+ mEntriesEnd(nullptr),
+ mName(aName) {
+ const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF);
+
+ if (aSize < sizeof(Elf32_Ehdr)) return;
+
+ const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr));
+ if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
+ file.e_ident[EI_CLASS] != ELFCLASS32 ||
+ file.e_ident[EI_DATA] != hostEndian ||
+ file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM ||
+ file.e_version != EV_CURRENT)
+ // e_flags?
+ return;
+
+ MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
+ const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
+ for (unsigned i = 0; i < file.e_phnum; ++i) {
+ const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>(
+ fileHeaderAddr + file.e_phoff + i * file.e_phentsize));
+ if (phdr.p_type == PT_ARM_EXIDX) {
+ exidxHdr = &phdr;
+ } else if (phdr.p_type == PT_LOAD) {
+ if (phdr.p_offset == 0) {
+ zeroHdr = &phdr;
+ }
+ if (phdr.p_flags & PF_X) {
+ mStartPC = std::min(mStartPC, phdr.p_vaddr);
+ mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
+ }
+ }
+ }
+ if (!exidxHdr) return;
+ if (!zeroHdr) return;
+ mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr;
+ mStartPC += mBaseAddress;
+ mEndPC += mBaseAddress;
+ mEntriesBegin =
+ reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr);
+ mEntriesEnd = reinterpret_cast<const EHEntry*>(
+ mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz);
+}
+
+Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
+
+// Async signal safe; can fail if Update() hasn't returned yet.
+const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; }
+
+// Collect unwinding information from loaded objects. Calls after the
+// first have no effect. Async signal unsafe.
+void EHAddrSpace::Update() {
+ const EHAddrSpace* space = sCurrent;
+ if (space) return;
+
+ SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+ std::vector<EHTable> tables;
+
+ for (size_t i = 0; i < info.GetSize(); ++i) {
+ const SharedLibrary& lib = info.GetEntry(i);
+ // FIXME: This isn't correct if the start address isn't p_offset 0, because
+ // the start address will not point at the file header. But this is worked
+ // around by magic number checks in the EHTable constructor.
+ EHTable tab(reinterpret_cast<const void*>(lib.GetStart()),
+ lib.GetEnd() - lib.GetStart(), lib.GetDebugPath());
+ if (tab.isValid()) tables.push_back(tab);
+ }
+ space = new EHAddrSpace(tables);
+
+ if (!sCurrent.compareExchange(nullptr, space)) {
+ delete space;
+ space = sCurrent;
+ }
+}
+
+EHState::EHState(const mcontext_t& context) {
+#ifdef linux
+ mRegs[0] = context.arm_r0;
+ mRegs[1] = context.arm_r1;
+ mRegs[2] = context.arm_r2;
+ mRegs[3] = context.arm_r3;
+ mRegs[4] = context.arm_r4;
+ mRegs[5] = context.arm_r5;
+ mRegs[6] = context.arm_r6;
+ mRegs[7] = context.arm_r7;
+ mRegs[8] = context.arm_r8;
+ mRegs[9] = context.arm_r9;
+ mRegs[10] = context.arm_r10;
+ mRegs[11] = context.arm_fp;
+ mRegs[12] = context.arm_ip;
+ mRegs[13] = context.arm_sp;
+ mRegs[14] = context.arm_lr;
+ mRegs[15] = context.arm_pc;
+#else
+# error "Unhandled OS for ARM EHABI unwinding"
+#endif
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.h b/mozglue/baseprofiler/core/EHABIStackWalk.h
new file mode 100644
index 0000000000..d5f4edc0d7
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.h
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI; see the comment at the top of
+ * the .cpp file for details.
+ */
+
+#ifndef mozilla_EHABIStackWalk_h__
+#define mozilla_EHABIStackWalk_h__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+namespace mozilla {
+namespace baseprofiler {
+
+void EHABIStackWalkInit();
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+ void** aPCs, size_t aNumFrames);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/PageInformation.cpp b/mozglue/baseprofiler/core/PageInformation.cpp
new file mode 100644
index 0000000000..bb7c7872aa
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.cpp
@@ -0,0 +1,49 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PageInformation.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+PageInformation::PageInformation(uint64_t aTabID, uint64_t aInnerWindowID,
+ const std::string& aUrl,
+ uint64_t aEmbedderInnerWindowID)
+ : mTabID(aTabID),
+ mInnerWindowID(aInnerWindowID),
+ mUrl(aUrl),
+ mEmbedderInnerWindowID(aEmbedderInnerWindowID),
+ mRefCnt(0) {}
+
+bool PageInformation::Equals(PageInformation* aOtherPageInfo) const {
+ // It's enough to check inner window IDs because they are unique for each
+ // page. Therefore, we don't have to check the tab ID or url.
+ return InnerWindowID() == aOtherPageInfo->InnerWindowID();
+}
+
+void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const {
+ aWriter.StartObjectElement();
+ // Here, we are converting uint64_t to double. Both tab and Inner
+ // Window IDs are created using `nsContentUtils::GenerateProcessSpecificId`,
+ // which is specifically designed to only use 53 of the 64 bits to be lossless
+ // when passed into and out of JS as a double.
+ aWriter.DoubleProperty("tabID", TabID());
+ aWriter.DoubleProperty("innerWindowID", InnerWindowID());
+ aWriter.StringProperty("url", Url());
+ aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID());
+ aWriter.EndObject();
+}
+
+size_t PageInformation::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ return aMallocSizeOf(this);
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/PageInformation.h b/mozglue/baseprofiler/core/PageInformation.h
new file mode 100644
index 0000000000..41808877b3
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PageInformation_h
+#define PageInformation_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// This class contains information that's relevant to a single page only
+// while the page information is important and registered with the profiler,
+// but regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+// When the page gets unregistered, we keep the profiler buffer position
+// to determine if we are still using this page. If not, we unregister
+// it in the next page registration.
+class PageInformation final {
+ public:
+ PageInformation(uint64_t aTabID, uint64_t aInnerWindowID,
+ const std::string& aUrl, uint64_t aEmbedderInnerWindowID);
+
+ // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+ // the same code between mozglue and libxul, see bug 1536656.
+ MFBT_API void AddRef() const { ++mRefCnt; }
+ MFBT_API void Release() const {
+ MOZ_ASSERT(int32_t(mRefCnt) > 0);
+ if (--mRefCnt) {
+ delete this;
+ }
+ }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+ bool Equals(PageInformation* aOtherPageInfo) const;
+ void StreamJSON(SpliceableJSONWriter& aWriter) const;
+
+ uint64_t InnerWindowID() const { return mInnerWindowID; }
+ uint64_t TabID() const { return mTabID; }
+ const std::string& Url() const { return mUrl; }
+ uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; }
+
+ Maybe<uint64_t> BufferPositionWhenUnregistered() const {
+ return mBufferPositionWhenUnregistered;
+ }
+
+ void NotifyUnregistered(uint64_t aBufferPosition) {
+ mBufferPositionWhenUnregistered = Some(aBufferPosition);
+ }
+
+ private:
+ const uint64_t mTabID;
+ const uint64_t mInnerWindowID;
+ const std::string mUrl;
+ const uint64_t mEmbedderInnerWindowID;
+
+ // Holds the buffer position when page is unregistered.
+ // It's used to determine if we still use this page in the profiler or
+ // not.
+ Maybe<uint64_t> mBufferPositionWhenUnregistered;
+
+ mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // PageInformation_h
diff --git a/mozglue/baseprofiler/core/PlatformMacros.h b/mozglue/baseprofiler/core/PlatformMacros.h
new file mode 100644
index 0000000000..c72e94c128
--- /dev/null
+++ b/mozglue/baseprofiler/core/PlatformMacros.h
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PLATFORM_MACROS_H
+#define PLATFORM_MACROS_H
+
+// Define platform selection macros in a consistent way. Don't add anything
+// else to this file, so it can remain freestanding. The primary factorisation
+// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined
+// too, since they are sometimes convenient.
+//
+// Note: "GP" is short for "Gecko Profiler".
+
+#undef GP_PLAT_x86_android
+#undef GP_PLAT_amd64_android
+#undef GP_PLAT_arm_android
+#undef GP_PLAT_arm64_android
+#undef GP_PLAT_x86_linux
+#undef GP_PLAT_amd64_linux
+#undef GP_PLAT_arm_linux
+#undef GP_PLAT_mips64_linux
+#undef GP_PLAT_amd64_darwin
+#undef GP_PLAT_arm64_darwin
+#undef GP_PLAT_x86_windows
+#undef GP_PLAT_amd64_windows
+#undef GP_PLAT_arm64_windows
+
+#undef GP_ARCH_x86
+#undef GP_ARCH_amd64
+#undef GP_ARCH_arm
+#undef GP_ARCH_arm64
+#undef GP_ARCH_mips64
+
+#undef GP_OS_android
+#undef GP_OS_linux
+#undef GP_OS_darwin
+#undef GP_OS_windows
+
+// We test __ANDROID__ before __linux__ because __linux__ is defined on both
+// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux.
+
+#if defined(__ANDROID__) && defined(__i386__)
+# define GP_PLAT_x86_android 1
+# define GP_ARCH_x86 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__x86_64__)
+# define GP_PLAT_amd64_android 1
+# define GP_ARCH_amd64 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+# define GP_PLAT_arm_android 1
+# define GP_ARCH_arm 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__aarch64__)
+# define GP_PLAT_arm64_android 1
+# define GP_ARCH_arm64 1
+# define GP_OS_android 1
+
+#elif defined(__linux__) && defined(__i386__)
+# define GP_PLAT_x86_linux 1
+# define GP_ARCH_x86 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__x86_64__)
+# define GP_PLAT_amd64_linux 1
+# define GP_ARCH_amd64 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__arm__)
+# define GP_PLAT_arm_linux 1
+# define GP_ARCH_arm 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__aarch64__)
+# define GP_PLAT_arm64_linux 1
+# define GP_ARCH_arm64 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__mips64)
+# define GP_PLAT_mips64_linux 1
+# define GP_ARCH_mips64 1
+# define GP_OS_linux 1
+
+#elif defined(__APPLE__) && defined(__aarch64__)
+# define GP_PLAT_arm64_darwin 1
+# define GP_ARCH_arm64 1
+# define GP_OS_darwin 1
+
+#elif defined(__APPLE__) && defined(__x86_64__)
+# define GP_PLAT_amd64_darwin 1
+# define GP_ARCH_amd64 1
+# define GP_OS_darwin 1
+
+#elif defined(__FreeBSD__) && defined(__x86_64__)
+# define GP_PLAT_amd64_freebsd 1
+# define GP_ARCH_amd64 1
+# define GP_OS_freebsd 1
+
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+# define GP_PLAT_arm64_freebsd 1
+# define GP_ARCH_arm64 1
+# define GP_OS_freebsd 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+ (defined(_M_IX86) || defined(__i386__))
+# define GP_PLAT_x86_windows 1
+# define GP_ARCH_x86 1
+# define GP_OS_windows 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+ (defined(_M_X64) || defined(__x86_64__))
+# define GP_PLAT_amd64_windows 1
+# define GP_ARCH_amd64 1
+# define GP_OS_windows 1
+
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+# define GP_PLAT_arm64_windows 1
+# define GP_ARCH_arm64 1
+# define GP_OS_windows 1
+
+#else
+# error "Unsupported platform"
+#endif
+
+#endif /* ndef PLATFORM_MACROS_H */
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.cpp b/mozglue/baseprofiler/core/ProfileBuffer.cpp
new file mode 100644
index 0000000000..dd5504274b
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.cpp
@@ -0,0 +1,217 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBuffer.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer)
+ : mEntries(aBuffer) {
+ // Assume the given buffer is in-session.
+ MOZ_ASSERT(mEntries.IsInSession());
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ const ProfileBufferEntry& aEntry) {
+ switch (aEntry.GetKind()) {
+#define SWITCH_KIND(KIND, TYPE, SIZE) \
+ case ProfileBufferEntry::Kind::KIND: { \
+ return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \
+ break; \
+ }
+
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND)
+
+#undef SWITCH_KIND
+ default:
+ MOZ_ASSERT(false, "Unhandled baseprofiler::ProfilerBuffer entry KIND");
+ return ProfileBufferBlockIndex{};
+ }
+}
+
+// Called from signal, call only reentrant functions
+uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) {
+ return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex();
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ BaseProfilerThreadId aThreadId) {
+ return AddEntry(aProfileChunkedBuffer,
+ ProfileBufferEntry::ThreadId(aThreadId));
+}
+
+uint64_t ProfileBuffer::AddThreadIdEntry(BaseProfilerThreadId aThreadId) {
+ return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex();
+}
+
+void ProfileBuffer::CollectCodeLocation(
+ const char* aLabel, const char* aStr, uint32_t aFrameFlags,
+ uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber,
+ const Maybe<uint32_t>& aColumnNumber,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair) {
+ AddEntry(ProfileBufferEntry::Label(aLabel));
+ AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags)));
+
+ if (aStr) {
+ // Store the string using one or more DynamicStringFragment entries.
+ size_t strLen = strlen(aStr) + 1; // +1 for the null terminator
+ // If larger than the prescribed limit, we will cut the string and end it
+ // with an ellipsis.
+ const bool tooBig = strLen > kMaxFrameKeyLength;
+ if (tooBig) {
+ strLen = kMaxFrameKeyLength;
+ }
+ char chars[ProfileBufferEntry::kNumChars];
+ for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) {
+ // Store up to kNumChars characters in the entry.
+ size_t len = ProfileBufferEntry::kNumChars;
+ const bool last = j + len >= strLen;
+ if (last) {
+ // Only the last entry may be smaller than kNumChars.
+ len = strLen - j;
+ if (tooBig) {
+ // That last entry is part of a too-big string, replace the end
+ // characters with an ellipsis "...".
+ len = std::max(len, size_t(4));
+ chars[len - 4] = '.';
+ chars[len - 3] = '.';
+ chars[len - 2] = '.';
+ chars[len - 1] = '\0';
+ // Make sure the memcpy will not overwrite our ellipsis!
+ len -= 4;
+ }
+ }
+ memcpy(chars, &aStr[j], len);
+ AddEntry(ProfileBufferEntry::DynamicStringFragment(chars));
+ if (last) {
+ break;
+ }
+ }
+ }
+
+ if (aInnerWindowID) {
+ AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID));
+ }
+
+ if (aLineNumber) {
+ AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber));
+ }
+
+ if (aColumnNumber) {
+ AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber));
+ }
+
+ if (aCategoryPair.isSome()) {
+ AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair)));
+ }
+}
+
+size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - memory pointed to by the elements within mEntries
+ return mEntries.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+}
+
+void ProfileBuffer::CollectOverheadStats(TimeDuration aSamplingTime,
+ TimeDuration aLocking,
+ TimeDuration aCleaning,
+ TimeDuration aCounters,
+ TimeDuration aThreads) {
+ double timeUs = aSamplingTime.ToMilliseconds() * 1000.0;
+ if (mFirstSamplingTimeUs == 0.0) {
+ mFirstSamplingTimeUs = timeUs;
+ } else {
+ // Note that we'll have 1 fewer interval than other numbers (because
+ // we need both ends of an interval to know its duration). The final
+ // difference should be insignificant over the expected many thousands
+ // of iterations.
+ mIntervalsUs.Count(timeUs - mLastSamplingTimeUs);
+ }
+ mLastSamplingTimeUs = timeUs;
+ // Time to take the lock before sampling.
+ double lockingUs = aLocking.ToMilliseconds() * 1000.0;
+ // Time to discard expired data.
+ double cleaningUs = aCleaning.ToMilliseconds() * 1000.0;
+ // Time to gather all counters.
+ double countersUs = aCounters.ToMilliseconds() * 1000.0;
+ // Time to sample all threads.
+ double threadsUs = aThreads.ToMilliseconds() * 1000.0;
+
+ // Add to our gathered stats.
+ mOverheadsUs.Count(lockingUs + cleaningUs + countersUs + threadsUs);
+ mLockingsUs.Count(lockingUs);
+ mCleaningsUs.Count(cleaningUs);
+ mCountersUs.Count(countersUs);
+ mThreadsUs.Count(threadsUs);
+
+ // Record details in buffer, if requested.
+ static const bool sRecordSamplingOverhead = []() {
+ const char* recordOverheads = getenv("MOZ_PROFILER_RECORD_OVERHEADS");
+ return recordOverheads && recordOverheads[0] != '\0';
+ }();
+ if (sRecordSamplingOverhead) {
+ AddEntry(ProfileBufferEntry::ProfilerOverheadTime(timeUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(lockingUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaningUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(countersUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threadsUs));
+ }
+}
+
+ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const {
+ return {BufferRangeStart(),
+ BufferRangeEnd(),
+ static_cast<uint32_t>(*mEntries.BufferLength() /
+ 8), // 8 bytes per entry.
+ mIntervalsUs,
+ mOverheadsUs,
+ mLockingsUs,
+ mCleaningsUs,
+ mCountersUs,
+ mThreadsUs};
+}
+
+/* ProfileBufferCollector */
+
+void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) {
+ mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr));
+}
+
+void ProfileBufferCollector::CollectProfilingStackFrame(
+ const ProfilingStackFrame& aFrame) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ MOZ_ASSERT(aFrame.isLabelFrame() ||
+ (aFrame.isJsFrame() && !aFrame.isOSRFrame()));
+
+ const char* label = aFrame.label();
+ const char* dynamicString = aFrame.dynamicString();
+ Maybe<uint32_t> line;
+ Maybe<uint32_t> column;
+
+ MOZ_ASSERT(aFrame.isLabelFrame());
+
+ mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(),
+ aFrame.realmID(), line, column,
+ Some(aFrame.categoryPair()));
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.h b/mozglue/baseprofiler/core/ProfileBuffer.h
new file mode 100644
index 0000000000..f77b429df8
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.h
@@ -0,0 +1,211 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_BUFFER_H
+#define MOZ_PROFILE_BUFFER_H
+
+#include "ProfileBufferEntry.h"
+
+#include "BaseProfiler.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// Class storing most profiling data in a ProfileChunkedBuffer.
+//
+// This class is used as a queue of entries which, after construction, never
+// allocates. This makes it safe to use in the profiler's "critical section".
+class ProfileBuffer final {
+ public:
+ // ProfileBuffer constructor
+ // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer
+ // manager.
+ explicit ProfileBuffer(ProfileChunkedBuffer& aBuffer);
+
+ ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { return mEntries; }
+
+ bool IsThreadSafe() const { return mEntries.IsThreadSafe(); }
+
+ // Add |aEntry| to the buffer, ignoring what kind of entry it is.
+ // Returns the position of the entry.
+ uint64_t AddEntry(const ProfileBufferEntry& aEntry);
+
+ // Add to the buffer a sample start (ThreadId) entry for aThreadId.
+ // Returns the position of the entry.
+ uint64_t AddThreadIdEntry(BaseProfilerThreadId aThreadId);
+
+ void CollectCodeLocation(const char* aLabel, const char* aStr,
+ uint32_t aFrameFlags, uint64_t aInnerWindowID,
+ const Maybe<uint32_t>& aLineNumber,
+ const Maybe<uint32_t>& aColumnNumber,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair);
+
+ // Maximum size of a frameKey string that we'll handle.
+ static const size_t kMaxFrameKeyLength = 512;
+
+ // Stream JSON for samples in the buffer to aWriter, using the supplied
+ // UniqueStacks object.
+ // Only streams samples for the given thread ID and which were taken at or
+ // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only
+ // be used when the buffer contains only one sample.
+ // Return the thread ID of the streamed sample(s), or 0.
+ BaseProfilerThreadId StreamSamplesToJSON(SpliceableJSONWriter& aWriter,
+ BaseProfilerThreadId aThreadId,
+ double aSinceTime,
+ UniqueStacks& aUniqueStacks) const;
+
+ void StreamMarkersToJSON(SpliceableJSONWriter& aWriter,
+ BaseProfilerThreadId aThreadId,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime,
+ UniqueStacks& aUniqueStacks) const;
+ void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+ double aSinceTime) const;
+ void StreamProfilerOverheadToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const;
+ void StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const;
+
+ // Find (via |aLastSample|) the most recent sample for the thread denoted by
+ // |aThreadId| and clone it, patching in the current time as appropriate.
+ // Mutate |aLastSample| to point to the newly inserted sample.
+ // Returns whether duplication was successful.
+ bool DuplicateLastSample(BaseProfilerThreadId aThreadId,
+ const TimeStamp& aProcessStartTime,
+ Maybe<uint64_t>& aLastSample);
+
+ void DiscardSamplesBeforeTime(double aTime);
+
+ size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const;
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+ void CollectOverheadStats(TimeDuration aSamplingTime, TimeDuration aLocking,
+ TimeDuration aCleaning, TimeDuration aCounters,
+ TimeDuration aThreads);
+
+ ProfilerBufferInfo GetProfilerBufferInfo() const;
+
+ private:
+ // Add |aEntry| to the provider ProfileChunkedBuffer.
+ // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+ // that is not attached to a `ProfileBuffer`.
+ static ProfileBufferBlockIndex AddEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ const ProfileBufferEntry& aEntry);
+
+ // Add a sample start (ThreadId) entry for aThreadId to the provided
+ // ProfileChunkedBuffer. Returns the position of the entry.
+ // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+ // that is not attached to a `ProfileBuffer`.
+ static ProfileBufferBlockIndex AddThreadIdEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ BaseProfilerThreadId aThreadId);
+
+ // The storage in which this ProfileBuffer stores its entries.
+ ProfileChunkedBuffer& mEntries;
+
+ public:
+ // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values
+ // corresponding to the first entry and past the last entry stored in
+ // `mEntries`.
+ //
+ // The returned values are not guaranteed to be stable, because other threads
+ // may also be accessing the buffer concurrently. But they will always
+ // increase, and can therefore give an indication of how far these values have
+ // *at least* reached. In particular:
+ // - Entries whose index is strictly less that `BufferRangeStart()` have been
+ // discarded by now, so any related data may also be safely discarded.
+ // - It is safe to try and read entries at any index strictly less than
+ // `BufferRangeEnd()` -- but note that these reads may fail by the time you
+ // request them, as old entries get overwritten by new ones.
+ uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; }
+ uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; }
+
+ private:
+ // Single pre-allocated chunk (to avoid spurious mallocs), used when:
+ // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize).
+ // - Adding JIT info.
+ // - Streaming stacks to JSON.
+ // Mutable because it's accessed from non-multithreaded const methods.
+ mutable Maybe<ProfileBufferChunkManagerSingle> mMaybeWorkerChunkManager;
+ ProfileBufferChunkManagerSingle& WorkerChunkManager() const {
+ if (mMaybeWorkerChunkManager.isNothing()) {
+ // Only actually allocate it on first use. (Some ProfileBuffers are
+ // temporary and don't actually need this.)
+ mMaybeWorkerChunkManager.emplace(
+ ProfileBufferChunk::SizeofChunkMetadata() +
+ ProfileBufferChunkManager::scExpectedMaximumStackSize);
+ }
+ return *mMaybeWorkerChunkManager;
+ }
+
+ // Time from launch (us) when first sampling was recorded.
+ double mFirstSamplingTimeUs = 0.0;
+ // Time from launch (us) when last sampling was recorded.
+ double mLastSamplingTimeUs = 0.0;
+ // Sampling stats: Interval (us) between successive samplings.
+ ProfilerStats mIntervalsUs;
+ // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+ ProfilerStats mOverheadsUs;
+ // Sampling stats: Time (us) to acquire the lock before sampling.
+ ProfilerStats mLockingsUs;
+ // Sampling stats: Time (us) to discard expired data.
+ ProfilerStats mCleaningsUs;
+ // Sampling stats: Time (us) to collect counter data.
+ ProfilerStats mCountersUs;
+ // Sampling stats: Time (us) to sample thread stacks.
+ ProfilerStats mThreadsUs;
+};
+
+/**
+ * Helper type used to implement ProfilerStackCollector. This type is used as
+ * the collector for MergeStacks by ProfileBuffer. It holds a reference to the
+ * buffer, as well as additional feature flags which are needed to control the
+ * data collection strategy
+ */
+class ProfileBufferCollector final : public ProfilerStackCollector {
+ public:
+ ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos,
+ uint64_t aBufferRangeStart)
+ : mBuf(aBuf),
+ mSamplePositionInBuffer(aSamplePos),
+ mBufferRangeStart(aBufferRangeStart) {
+ MOZ_ASSERT(
+ mSamplePositionInBuffer >= mBufferRangeStart,
+ "The sample position should always be after the buffer range start");
+ }
+
+ // Position at which the sample starts in the profiler buffer (which may be
+ // different from the buffer in which the sample data is collected here).
+ Maybe<uint64_t> SamplePositionInBuffer() override {
+ return Some(mSamplePositionInBuffer);
+ }
+
+ // Profiler buffer's range start (which may be different from the buffer in
+ // which the sample data is collected here).
+ Maybe<uint64_t> BufferRangeStart() override {
+ return Some(mBufferRangeStart);
+ }
+
+ virtual void CollectNativeLeafAddr(void* aAddr) override;
+ virtual void CollectProfilingStackFrame(
+ const ProfilingStackFrame& aFrame) override;
+
+ private:
+ ProfileBuffer& mBuf;
+ uint64_t mSamplePositionInBuffer;
+ uint64_t mBufferRangeStart;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.cpp b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
new file mode 100644
index 0000000000..ab30b52cee
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
@@ -0,0 +1,1341 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBufferEntry.h"
+
+#include <ostream>
+#include <type_traits>
+
+#include "mozilla/Logging.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/StackWalk.h"
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "platform.h"
+#include "ProfileBuffer.h"
+#include "ProfilerBacktrace.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ProfileBufferEntry
+
+ProfileBufferEntry::ProfileBufferEntry()
+ : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {}
+
+// aString must be a static string.
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString)
+ : mKind(aKind) {
+ memcpy(mStorage, &aString, sizeof(aString));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars])
+ : mKind(aKind) {
+ memcpy(mStorage, aChars, kNumChars);
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) {
+ memcpy(mStorage, &aPtr, sizeof(aPtr));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble)
+ : mKind(aKind) {
+ memcpy(mStorage, &aDouble, sizeof(aDouble));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) {
+ memcpy(mStorage, &aInt, sizeof(aInt));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64)
+ : mKind(aKind) {
+ memcpy(mStorage, &aInt64, sizeof(aInt64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64)
+ : mKind(aKind) {
+ memcpy(mStorage, &aUint64, sizeof(aUint64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind,
+ BaseProfilerThreadId aThreadId)
+ : mKind(aKind) {
+ static_assert(std::is_trivially_copyable_v<BaseProfilerThreadId>);
+ static_assert(sizeof(aThreadId) <= sizeof(mStorage));
+ memcpy(mStorage, &aThreadId, sizeof(aThreadId));
+}
+
+const char* ProfileBufferEntry::GetString() const {
+ const char* result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+void* ProfileBufferEntry::GetPtr() const {
+ void* result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+double ProfileBufferEntry::GetDouble() const {
+ double result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+int ProfileBufferEntry::GetInt() const {
+ int result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+int64_t ProfileBufferEntry::GetInt64() const {
+ int64_t result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+uint64_t ProfileBufferEntry::GetUint64() const {
+ uint64_t result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+BaseProfilerThreadId ProfileBufferEntry::GetThreadId() const {
+ BaseProfilerThreadId result;
+ static_assert(std::is_trivially_copyable_v<BaseProfilerThreadId>);
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const {
+ memcpy(aOutArray, mStorage, kNumChars);
+}
+
+// END ProfileBufferEntry
+////////////////////////////////////////////////////////////////////////
+
+// As mentioned in ProfileBufferEntry.h, the JSON format contains many
+// arrays whose elements are laid out according to various schemas to help
+// de-duplication. This RAII class helps write these arrays by keeping track of
+// the last non-null element written and adding the appropriate number of null
+// elements when writing new non-null elements. It also automatically opens and
+// closes an array element on the given JSON writer.
+//
+// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and
+// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do
+// not access them independently while the AutoArraySchemaWriter is alive.
+// If you need to add complex objects, call FreeFormElement(), which will give
+// you temporary access to the writer.
+//
+// Example usage:
+//
+// // Define the schema of elements in this type of array: [FOO, BAR, BAZ]
+// enum Schema : uint32_t {
+// FOO = 0,
+// BAR = 1,
+// BAZ = 2
+// };
+//
+// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings);
+// if (shouldWriteFoo) {
+// writer.IntElement(FOO, getFoo());
+// }
+// ... etc ...
+//
+// The elements need to be added in-order.
+class MOZ_RAII AutoArraySchemaWriter {
+ public:
+ explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+ : mJSONWriter(aWriter), mNextFreeIndex(0) {
+ mJSONWriter.StartArrayElement();
+ }
+
+ ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); }
+
+ template <typename T>
+ void IntElement(uint32_t aIndex, T aValue) {
+ static_assert(!std::is_same_v<T, uint64_t>,
+ "Narrowing uint64 -> int64 conversion not allowed");
+ FillUpTo(aIndex);
+ mJSONWriter.IntElement(static_cast<int64_t>(aValue));
+ }
+
+ void DoubleElement(uint32_t aIndex, double aValue) {
+ FillUpTo(aIndex);
+ mJSONWriter.DoubleElement(aValue);
+ }
+
+ void TimeMsElement(uint32_t aIndex, double aTime_ms) {
+ FillUpTo(aIndex);
+ mJSONWriter.TimeDoubleMsElement(aTime_ms);
+ }
+
+ void BoolElement(uint32_t aIndex, bool aValue) {
+ FillUpTo(aIndex);
+ mJSONWriter.BoolElement(aValue);
+ }
+
+ protected:
+ SpliceableJSONWriter& Writer() { return mJSONWriter; }
+
+ void FillUpTo(uint32_t aIndex) {
+ MOZ_ASSERT(aIndex >= mNextFreeIndex);
+ mJSONWriter.NullElements(aIndex - mNextFreeIndex);
+ mNextFreeIndex = aIndex + 1;
+ }
+
+ private:
+ SpliceableJSONWriter& mJSONWriter;
+ uint32_t mNextFreeIndex;
+};
+
+// Same as AutoArraySchemaWriter, but this can also write strings (output as
+// indexes into the table of unique strings).
+class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter {
+ public:
+ AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter,
+ UniqueJSONStrings& aStrings)
+ : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {}
+
+ void StringElement(uint32_t aIndex, const Span<const char>& aValue) {
+ FillUpTo(aIndex);
+ mStrings.WriteElement(Writer(), aValue);
+ }
+
+ private:
+ UniqueJSONStrings& mStrings;
+};
+
+UniqueStacks::StackKey UniqueStacks::BeginStack(const FrameKey& aFrame) {
+ return StackKey(GetOrAddFrameIndex(aFrame));
+}
+
+UniqueStacks::StackKey UniqueStacks::AppendFrame(const StackKey& aStack,
+ const FrameKey& aFrame) {
+ return StackKey(aStack, GetOrAddStackIndex(aStack),
+ GetOrAddFrameIndex(aFrame));
+}
+
+bool UniqueStacks::FrameKey::NormalFrameData::operator==(
+ const NormalFrameData& aOther) const {
+ return mLocation == aOther.mLocation &&
+ mRelevantForJS == aOther.mRelevantForJS &&
+ mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine &&
+ mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair;
+}
+
+UniqueStacks::UniqueStacks()
+ : mUniqueStrings(MakeUnique<UniqueJSONStrings>(
+ FailureLatchInfallibleSource::Singleton())),
+ mFrameTableWriter(FailureLatchInfallibleSource::Singleton()),
+ mStackTableWriter(FailureLatchInfallibleSource::Singleton()) {
+ mFrameTableWriter.StartBareList();
+ mStackTableWriter.StartBareList();
+}
+
+uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) {
+ uint32_t count = mStackToIndexMap.count();
+ auto entry = mStackToIndexMap.lookupForAdd(aStack);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return entry->value();
+ }
+
+ MOZ_RELEASE_ASSERT(mStackToIndexMap.add(entry, aStack, count));
+ StreamStack(aStack);
+ return count;
+}
+
+uint32_t UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) {
+ uint32_t count = mFrameToIndexMap.count();
+ auto entry = mFrameToIndexMap.lookupForAdd(aFrame);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return entry->value();
+ }
+
+ MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, aFrame, count));
+ StreamNonJITFrame(aFrame);
+ return count;
+}
+
+void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) {
+ mFrameTableWriter.EndBareList();
+ aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) {
+ mStackTableWriter.EndBareList();
+ aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::StreamStack(const StackKey& aStack) {
+ enum Schema : uint32_t { PREFIX = 0, FRAME = 1 };
+
+ AutoArraySchemaWriter writer(mStackTableWriter);
+ if (aStack.mPrefixStackIndex.isSome()) {
+ writer.IntElement(PREFIX, *aStack.mPrefixStackIndex);
+ }
+ writer.IntElement(FRAME, aStack.mFrameIndex);
+}
+
+void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) {
+ using NormalFrameData = FrameKey::NormalFrameData;
+
+ enum Schema : uint32_t {
+ LOCATION = 0,
+ RELEVANT_FOR_JS = 1,
+ INNER_WINDOW_ID = 2,
+ IMPLEMENTATION = 3,
+ LINE = 4,
+ COLUMN = 5,
+ CATEGORY = 6,
+ SUBCATEGORY = 7
+ };
+
+ AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings);
+
+ const NormalFrameData& data = aFrame.mData.as<NormalFrameData>();
+ writer.StringElement(LOCATION, data.mLocation);
+ writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS);
+
+ // It's okay to convert uint64_t to double here because DOM always creates IDs
+ // that are convertible to double.
+ writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID);
+
+ if (data.mLine.isSome()) {
+ writer.IntElement(LINE, *data.mLine);
+ }
+ if (data.mColumn.isSome()) {
+ writer.IntElement(COLUMN, *data.mColumn);
+ }
+ if (data.mCategoryPair.isSome()) {
+ const ProfilingCategoryPairInfo& info =
+ GetProfilingCategoryPairInfo(*data.mCategoryPair);
+ writer.IntElement(CATEGORY, uint32_t(info.mCategory));
+ writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex);
+ }
+}
+
+struct ProfileSample {
+ uint32_t mStack;
+ double mTime;
+ Maybe<double> mResponsiveness;
+};
+
+static void WriteSample(SpliceableJSONWriter& aWriter,
+ const ProfileSample& aSample) {
+ enum Schema : uint32_t {
+ STACK = 0,
+ TIME = 1,
+ EVENT_DELAY = 2,
+ };
+
+ AutoArraySchemaWriter writer(aWriter);
+
+ writer.IntElement(STACK, aSample.mStack);
+
+ writer.TimeMsElement(TIME, aSample.mTime);
+
+ if (aSample.mResponsiveness.isSome()) {
+ writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness);
+ }
+}
+
+class EntryGetter {
+ public:
+ explicit EntryGetter(ProfileChunkedBuffer::Reader& aReader,
+ uint64_t aInitialReadPos = 0)
+ : mBlockIt(
+ aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ aInitialReadPos))),
+ mBlockItEnd(aReader.end()) {
+ if (!ReadLegacyOrEnd()) {
+ // Find and read the next non-legacy entry.
+ Next();
+ }
+ }
+
+ bool Has() const { return mBlockIt != mBlockItEnd; }
+
+ const ProfileBufferEntry& Get() const {
+ MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Get()`");
+ return mEntry;
+ }
+
+ void Next() {
+ MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Next()`");
+ for (;;) {
+ ++mBlockIt;
+ if (ReadLegacyOrEnd()) {
+ // Either we're at the end, or we could read a legacy entry -> Done.
+ break;
+ }
+ // Otherwise loop around until we hit the end or a legacy entry.
+ }
+ }
+
+ ProfileBufferBlockIndex CurBlockIndex() const {
+ return mBlockIt.CurrentBlockIndex();
+ }
+
+ uint64_t CurPos() const {
+ return CurBlockIndex().ConvertToProfileBufferIndex();
+ }
+
+ private:
+ // Try to read the entry at the current `mBlockIt` position.
+ // * If we're at the end of the buffer, just return `true`.
+ // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`),
+ // read it into `mEntry`, and return `true` as well.
+ // * Otherwise the entry contains a "modern" type that cannot be read into
+ // `mEntry`, return `false` (so `EntryGetter` can skip to another entry).
+ bool ReadLegacyOrEnd() {
+ if (!Has()) {
+ return true;
+ }
+ // Read the entry "kind", which is always at the start of all entries.
+ ProfileBufferEntryReader aER = *mBlockIt;
+ auto type = static_cast<ProfileBufferEntry::Kind>(
+ aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+ MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+ static_cast<ProfileBufferEntry::KindUnderlyingType>(
+ ProfileBufferEntry::Kind::MODERN_LIMIT));
+ if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) {
+ aER.SetRemainingBytes(0);
+ return false;
+ }
+ // Here, we have a legacy item, we need to read it from the start.
+ // Because the above `ReadObject` moved the reader, we ned to reset it to
+ // the start of the entry before reading the whole entry.
+ aER = *mBlockIt;
+ aER.ReadBytes(&mEntry, aER.RemainingBytes());
+ return true;
+ }
+
+ ProfileBufferEntry mEntry;
+ ProfileChunkedBuffer::BlockIterator mBlockIt;
+ const ProfileChunkedBuffer::BlockIterator mBlockItEnd;
+};
+
+// The following grammar shows legal sequences of profile buffer entries.
+// The sequences beginning with a ThreadId entry are known as "samples".
+//
+// (
+// ( /* Samples */
+// ThreadId
+// Time
+// ( NativeLeafAddr
+// | Label FrameFlags? DynamicStringFragment* LineNumber? CategoryPair?
+// | JitReturnAddr
+// )+
+// Responsiveness?
+// )
+// | MarkerData
+// | ( /* Counters */
+// CounterId
+// Time
+// (
+// CounterKey
+// Count
+// Number?
+// )*
+// )
+// | CollectionStart
+// | CollectionEnd
+// | Pause
+// | Resume
+// | ( ProfilerOverheadTime /* Sampling start timestamp */
+// ProfilerOverheadDuration /* Lock acquisition */
+// ProfilerOverheadDuration /* Expired data cleaning */
+// ProfilerOverheadDuration /* Counters */
+// ProfilerOverheadDuration /* Threads */
+// )
+// )*
+//
+// The most complicated part is the stack entry sequence that begins with
+// Label. Here are some examples.
+//
+// - ProfilingStack frames without a dynamic string:
+//
+// Label("js::RunScript")
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// Label("XREMain::XRE_main")
+// LineNumber(4660)
+// CategoryPair(ProfilingCategoryPair::OTHER)
+//
+// Label("ElementRestyler::ComputeStyleChangeFor")
+// LineNumber(3003)
+// CategoryPair(ProfilingCategoryPair::CSS)
+//
+// - ProfilingStack frames with a dynamic string:
+//
+// Label("nsObserverService::NotifyObservers")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("domwindo")
+// DynamicStringFragment("wopened")
+// LineNumber(291)
+// CategoryPair(ProfilingCategoryPair::OTHER)
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+// DynamicStringFragment("closeWin")
+// DynamicStringFragment("dow (chr")
+// DynamicStringFragment("ome://gl")
+// DynamicStringFragment("obal/con")
+// DynamicStringFragment("tent/glo")
+// DynamicStringFragment("balOverl")
+// DynamicStringFragment("ay.js:5)")
+// DynamicStringFragment("") # this string holds the closing '\0'
+// LineNumber(25)
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+// DynamicStringFragment("bound (s")
+// DynamicStringFragment("elf-host")
+// DynamicStringFragment("ed:914)")
+// LineNumber(945)
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// - A profiling stack frame with an overly long dynamic string:
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("(too lon")
+// DynamicStringFragment("g)")
+// LineNumber(100)
+// CategoryPair(ProfilingCategoryPair::NETWORK)
+//
+// - A wasm JIT frame:
+//
+// Label("")
+// FrameFlags(uint64_t(0))
+// DynamicStringFragment("wasm-fun")
+// DynamicStringFragment("ction[87")
+// DynamicStringFragment("36] (blo")
+// DynamicStringFragment("b:http:/")
+// DynamicStringFragment("/webasse")
+// DynamicStringFragment("mbly.org")
+// DynamicStringFragment("/3dc5759")
+// DynamicStringFragment("4-ce58-4")
+// DynamicStringFragment("626-975b")
+// DynamicStringFragment("-08ad116")
+// DynamicStringFragment("30bc1:38")
+// DynamicStringFragment("29856)")
+//
+// - A JS frame in a synchronous sample:
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("u (https")
+// DynamicStringFragment("://perf-")
+// DynamicStringFragment("html.io/")
+// DynamicStringFragment("ac0da204")
+// DynamicStringFragment("aaa44d75")
+// DynamicStringFragment("a800.bun")
+// DynamicStringFragment("dle.js:2")
+// DynamicStringFragment("5)")
+
+// Because this is a format entirely internal to the Profiler, any parsing
+// error indicates a bug in the ProfileBuffer writing or the parser itself,
+// or possibly flaky hardware.
+#define ERROR_AND_CONTINUE(msg) \
+ { \
+ fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
+ MOZ_ASSERT(false, msg); \
+ continue; \
+ }
+
+BaseProfilerThreadId ProfileBuffer::StreamSamplesToJSON(
+ SpliceableJSONWriter& aWriter, BaseProfilerThreadId aThreadId,
+ double aSinceTime, UniqueStacks& aUniqueStacks) const {
+ UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength);
+
+ return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ BaseProfilerThreadId processedThreadId;
+
+ EntryGetter e(*aReader);
+
+ for (;;) {
+ // This block skips entries until we find the start of the next sample.
+ // This is useful in three situations.
+ //
+ // - The circular buffer overwrites old entries, so when we start parsing
+ // we might be in the middle of a sample, and we must skip forward to
+ // the start of the next sample.
+ //
+ // - We skip samples that don't have an appropriate ThreadId or Time.
+ //
+ // - We skip range Pause, Resume, CollectionStart, Counter and
+ // CollectionEnd entries between samples.
+ while (e.Has()) {
+ if (e.Get().IsThreadId()) {
+ break;
+ }
+ e.Next();
+ }
+
+ if (!e.Has()) {
+ break;
+ }
+
+ // Due to the skip_to_next_sample block above, if we have an entry here it
+ // must be a ThreadId entry.
+ MOZ_ASSERT(e.Get().IsThreadId());
+
+ BaseProfilerThreadId threadId = e.Get().GetThreadId();
+ e.Next();
+
+ // Ignore samples that are for the wrong thread.
+ if (threadId != aThreadId && aThreadId.IsSpecified()) {
+ continue;
+ }
+
+ MOZ_ASSERT(
+ aThreadId.IsSpecified() || !processedThreadId.IsSpecified(),
+ "Unspecified aThreadId should only be used with 1-sample buffer");
+
+ ProfileSample sample;
+
+ if (e.Has() && e.Get().IsTime()) {
+ sample.mTime = e.Get().GetDouble();
+ e.Next();
+
+ // Ignore samples that are too old.
+ if (sample.mTime < aSinceTime) {
+ continue;
+ }
+ } else {
+ ERROR_AND_CONTINUE("expected a Time entry");
+ }
+
+ UniqueStacks::StackKey stack =
+ aUniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
+
+ int numFrames = 0;
+ while (e.Has()) {
+ if (e.Get().IsNativeLeafAddr()) {
+ numFrames++;
+
+ void* pc = e.Get().GetPtr();
+ e.Next();
+
+ static const uint32_t BUF_SIZE = 256;
+ char buf[BUF_SIZE];
+
+ // Bug 753041: We need a double cast here to tell GCC that we don't
+ // want to sign extend 32-bit addresses starting with 0xFXXXXXX.
+ unsigned long long pcULL = (unsigned long long)(uintptr_t)pc;
+ SprintfLiteral(buf, "0x%llx", pcULL);
+
+ // If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we add a local
+ // symbolication description to the PC address. This is off by
+ // default, and mainly intended for local development.
+ static const bool preSymbolicate = []() {
+ const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
+ return symbolicate && symbolicate[0] != '\0';
+ }();
+ if (preSymbolicate) {
+ MozCodeAddressDetails details;
+ if (MozDescribeCodeAddress(pc, &details)) {
+ // Replace \0 terminator with space.
+ const uint32_t pcLen = strlen(buf);
+ buf[pcLen] = ' ';
+ // Add description after space. Note: Using a frame number of 0,
+ // as using `numFrames` wouldn't help here, and would prevent
+ // combining same function calls that happen at different depths.
+ // TODO: Remove unsightly "#00: " if too annoying. :-)
+ MozFormatCodeAddressDetails(
+ buf + pcLen + 1, BUF_SIZE - (pcLen + 1), 0, pc, &details);
+ }
+ }
+
+ stack = aUniqueStacks.AppendFrame(stack, UniqueStacks::FrameKey(buf));
+
+ } else if (e.Get().IsLabel()) {
+ numFrames++;
+
+ const char* label = e.Get().GetString();
+ e.Next();
+
+ using FrameFlags = ProfilingStackFrame::Flags;
+ uint32_t frameFlags = 0;
+ if (e.Has() && e.Get().IsFrameFlags()) {
+ frameFlags = uint32_t(e.Get().GetUint64());
+ e.Next();
+ }
+
+ bool relevantForJS =
+ frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS);
+
+ // Copy potential dynamic string fragments into dynStrBuf, so that
+ // dynStrBuf will then contain the entire dynamic string.
+ size_t i = 0;
+ dynStrBuf[0] = '\0';
+ while (e.Has()) {
+ if (e.Get().IsDynamicStringFragment()) {
+ char chars[ProfileBufferEntry::kNumChars];
+ e.Get().CopyCharsInto(chars);
+ for (char c : chars) {
+ if (i < kMaxFrameKeyLength) {
+ dynStrBuf[i] = c;
+ i++;
+ }
+ }
+ e.Next();
+ } else {
+ break;
+ }
+ }
+ dynStrBuf[kMaxFrameKeyLength - 1] = '\0';
+ bool hasDynamicString = (i != 0);
+
+ std::string frameLabel;
+ if (label[0] != '\0' && hasDynamicString) {
+ if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) {
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else if (frameFlags &
+ uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) {
+ frameLabel += "get ";
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else if (frameFlags &
+ uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) {
+ frameLabel += "set ";
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else {
+ frameLabel += label;
+ frameLabel += ' ';
+ frameLabel += dynStrBuf.get();
+ }
+ } else if (hasDynamicString) {
+ frameLabel += dynStrBuf.get();
+ } else {
+ frameLabel += label;
+ }
+
+ uint64_t innerWindowID = 0;
+ if (e.Has() && e.Get().IsInnerWindowID()) {
+ innerWindowID = uint64_t(e.Get().GetUint64());
+ e.Next();
+ }
+
+ Maybe<unsigned> line;
+ if (e.Has() && e.Get().IsLineNumber()) {
+ line = Some(unsigned(e.Get().GetInt()));
+ e.Next();
+ }
+
+ Maybe<unsigned> column;
+ if (e.Has() && e.Get().IsColumnNumber()) {
+ column = Some(unsigned(e.Get().GetInt()));
+ e.Next();
+ }
+
+ Maybe<ProfilingCategoryPair> categoryPair;
+ if (e.Has() && e.Get().IsCategoryPair()) {
+ categoryPair =
+ Some(ProfilingCategoryPair(uint32_t(e.Get().GetInt())));
+ e.Next();
+ }
+
+ stack = aUniqueStacks.AppendFrame(
+ stack, UniqueStacks::FrameKey(std::move(frameLabel),
+ relevantForJS, innerWindowID, line,
+ column, categoryPair));
+
+ } else {
+ break;
+ }
+ }
+
+ if (numFrames == 0) {
+ // It is possible to have empty stacks if native stackwalking is
+ // disabled. Skip samples with empty stacks. (See Bug 1497985).
+ // Thus, don't use ERROR_AND_CONTINUE, but just continue.
+ continue;
+ }
+
+ sample.mStack = aUniqueStacks.GetOrAddStackIndex(stack);
+
+ if (e.Has() && e.Get().IsResponsiveness()) {
+ sample.mResponsiveness = Some(e.Get().GetDouble());
+ e.Next();
+ }
+
+ WriteSample(aWriter, sample);
+
+ processedThreadId = threadId;
+ }
+
+ return processedThreadId;
+ });
+}
+
+void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter,
+ BaseProfilerThreadId aThreadId,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime,
+ UniqueStacks& aUniqueStacks) const {
+ mEntries.ReadEach([&](ProfileBufferEntryReader& aER) {
+ auto type = static_cast<ProfileBufferEntry::Kind>(
+ aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+ MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+ static_cast<ProfileBufferEntry::KindUnderlyingType>(
+ ProfileBufferEntry::Kind::MODERN_LIMIT));
+ if (type == ProfileBufferEntry::Kind::Marker) {
+ ::mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream(
+ aER,
+ [&](const BaseProfilerThreadId& aMarkerThreadId) {
+ return (aMarkerThreadId == aThreadId) ? &aWriter : nullptr;
+ },
+ [&](ProfileChunkedBuffer& aChunkedBuffer) {
+ ProfilerBacktrace backtrace("", &aChunkedBuffer);
+ backtrace.StreamJSON(aWriter, TimeStamp::ProcessCreation(),
+ aUniqueStacks);
+ },
+ // We don't have Rust markers in the mozglue.
+ [&](mozilla::base_profiler_markers_detail::Streaming::
+ DeserializerTag) {
+ MOZ_ASSERT_UNREACHABLE("No Rust markers in mozglue.");
+ });
+ } else {
+ // The entry was not a marker, we need to skip to the end.
+ aER.SetRemainingBytes(0);
+ }
+ });
+}
+
+void ProfileBuffer::StreamProfilerOverheadToJSON(
+ SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+ double aSinceTime) const {
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ enum Schema : uint32_t {
+ TIME = 0,
+ LOCKING = 1,
+ MARKER_CLEANING = 2,
+ COUNTERS = 3,
+ THREADS = 4
+ };
+
+ aWriter.StartObjectProperty("profilerOverhead");
+ aWriter.StartObjectProperty("samples");
+ // Stream all sampling overhead data. We skip other entries, because we
+ // process them in StreamSamplesToJSON()/etc.
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("time");
+ schema.WriteField("locking");
+ schema.WriteField("expiredMarkerCleaning");
+ schema.WriteField("counters");
+ schema.WriteField("threads");
+ }
+
+ aWriter.StartArrayProperty("data");
+ double firstTime = 0.0;
+ double lastTime = 0.0;
+ ProfilerStats intervals, overheads, lockings, cleanings, counters, threads;
+ while (e.Has()) {
+ // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4
+ if (e.Get().IsProfilerOverheadTime()) {
+ double time = e.Get().GetDouble();
+ if (time >= aSinceTime) {
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime");
+ }
+ double locking = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration");
+ }
+ double cleaning = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration*2");
+ }
+ double counter = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration*3");
+ }
+ double thread = e.Get().GetDouble();
+
+ if (firstTime == 0.0) {
+ firstTime = time;
+ } else {
+ // Note that we'll have 1 fewer interval than other numbers (because
+ // we need both ends of an interval to know its duration). The final
+ // difference should be insignificant over the expected many
+ // thousands of iterations.
+ intervals.Count(time - lastTime);
+ }
+ lastTime = time;
+ overheads.Count(locking + cleaning + counter + thread);
+ lockings.Count(locking);
+ cleanings.Count(cleaning);
+ counters.Count(counter);
+ threads.Count(thread);
+
+ AutoArraySchemaWriter writer(aWriter);
+ writer.TimeMsElement(TIME, time);
+ writer.DoubleElement(LOCKING, locking);
+ writer.DoubleElement(MARKER_CLEANING, cleaning);
+ writer.DoubleElement(COUNTERS, counter);
+ writer.DoubleElement(THREADS, thread);
+ }
+ }
+ e.Next();
+ }
+ aWriter.EndArray(); // data
+ aWriter.EndObject(); // samples
+
+ // Only output statistics if there is at least one full interval (and
+ // therefore at least two samplings.)
+ if (intervals.n > 0) {
+ aWriter.StartObjectProperty("statistics");
+ aWriter.DoubleProperty("profiledDuration", lastTime - firstTime);
+ aWriter.IntProperty("samplingCount", overheads.n);
+ aWriter.DoubleProperty("overheadDurations", overheads.sum);
+ aWriter.DoubleProperty("overheadPercentage",
+ overheads.sum / (lastTime - firstTime));
+#define PROFILER_STATS(name, var) \
+ aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \
+ aWriter.DoubleProperty("min" name, (var).min); \
+ aWriter.DoubleProperty("max" name, (var).max);
+ PROFILER_STATS("Interval", intervals);
+ PROFILER_STATS("Overhead", overheads);
+ PROFILER_STATS("Lockings", lockings);
+ PROFILER_STATS("Cleaning", cleanings);
+ PROFILER_STATS("Counter", counters);
+ PROFILER_STATS("Thread", threads);
+#undef PROFILER_STATS
+ aWriter.EndObject(); // statistics
+ }
+ aWriter.EndObject(); // profilerOverhead
+ });
+}
+
+struct CounterSample {
+ double mTime;
+ uint64_t mNumber;
+ int64_t mCount;
+};
+
+using CounterSamples = Vector<CounterSample>;
+
+// HashMap lookup, if not found, a default value is inserted.
+// Returns reference to (existing or new) value inside the HashMap.
+template <typename HashM, typename Key>
+static auto& LookupOrAdd(HashM& aMap, Key&& aKey) {
+ auto addPtr = aMap.lookupForAdd(aKey);
+ if (!addPtr) {
+ MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey),
+ typename HashM::Entry::ValueType{}));
+ MOZ_ASSERT(!!addPtr);
+ }
+ return addPtr->value();
+}
+
+void ProfileBuffer::StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const {
+ // Because this is a format entirely internal to the Profiler, any parsing
+ // error indicates a bug in the ProfileBuffer writing or the parser itself,
+ // or possibly flaky hardware.
+
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ enum Schema : uint32_t { TIME = 0, COUNT = 1, NUMBER = 2 };
+
+ // Stream all counters. We skip other entries, because we process them in
+ // StreamSamplesToJSON()/etc.
+ //
+ // Valid sequence in the buffer:
+ // CounterID
+ // Time
+ // ( Count Number? )*
+ //
+ // And the JSON (example):
+ // "counters": {
+ // "name": "malloc",
+ // "category": "Memory",
+ // "description": "Amount of allocated memory",
+ // "samples": {
+ // "schema": {"time": 0, "count": 1, "number": 2},
+ // "data": [
+ // [
+ // 16117.033968000002,
+ // 2446216,
+ // 6801320
+ // ],
+ // [
+ // 16118.037638,
+ // 2446216,
+ // 6801320
+ // ],
+ // ],
+ // },
+ // }
+
+ // Build the map of counters and populate it
+ HashMap<void*, CounterSamples> counters;
+
+ while (e.Has()) {
+ // skip all non-Counters, including if we start in the middle of a counter
+ if (e.Get().IsCounterId()) {
+ void* id = e.Get().GetPtr();
+ CounterSamples& data = LookupOrAdd(counters, id);
+ e.Next();
+ if (!e.Has() || !e.Get().IsTime()) {
+ ERROR_AND_CONTINUE("expected a Time entry");
+ }
+ double time = e.Get().GetDouble();
+ e.Next();
+ if (time >= aSinceTime) {
+ if (!e.Has() || !e.Get().IsCount()) {
+ ERROR_AND_CONTINUE("expected a Count entry");
+ }
+ int64_t count = e.Get().GetUint64();
+ e.Next();
+ uint64_t number;
+ if (!e.Has() || !e.Get().IsNumber()) {
+ number = 0;
+ } else {
+ number = e.Get().GetInt64();
+ e.Next();
+ }
+ CounterSample sample = {time, number, count};
+ MOZ_RELEASE_ASSERT(data.append(sample));
+ } else {
+ // skip counter sample - only need to skip the initial counter
+ // id, then let the loop at the top skip the rest
+ }
+ } else {
+ e.Next();
+ }
+ }
+ // we have a map of counter entries; dump them to JSON
+ if (counters.count() == 0) {
+ return;
+ }
+
+ aWriter.StartArrayProperty("counters");
+ for (auto iter = counters.iter(); !iter.done(); iter.next()) {
+ CounterSamples& samples = iter.get().value();
+ size_t size = samples.length();
+ if (size == 0) {
+ continue;
+ }
+ const BaseProfilerCount* base_counter =
+ static_cast<const BaseProfilerCount*>(iter.get().key());
+
+ aWriter.Start();
+ aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel));
+ aWriter.StringProperty("category",
+ MakeStringSpan(base_counter->mCategory));
+ aWriter.StringProperty("description",
+ MakeStringSpan(base_counter->mDescription));
+
+ bool hasNumber = false;
+ for (size_t i = 0; i < size; i++) {
+ if (samples[i].mNumber != 0) {
+ hasNumber = true;
+ break;
+ }
+ }
+
+ aWriter.StartObjectProperty("samples");
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("time");
+ schema.WriteField("count");
+ if (hasNumber) {
+ schema.WriteField("number");
+ }
+ }
+
+ aWriter.StartArrayProperty("data");
+ uint64_t previousNumber = 0;
+ int64_t previousCount = 0;
+ for (size_t i = 0; i < size; i++) {
+ // Encode as deltas, and only encode if different than the last
+ // sample
+ if (i == 0 || samples[i].mNumber != previousNumber ||
+ samples[i].mCount != previousCount) {
+ MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime);
+ MOZ_ASSERT(samples[i].mNumber >= previousNumber);
+ MOZ_ASSERT(samples[i].mNumber - previousNumber <=
+ uint64_t(std::numeric_limits<int64_t>::max()));
+
+ AutoArraySchemaWriter writer(aWriter);
+ writer.TimeMsElement(TIME, samples[i].mTime);
+ writer.IntElement(COUNT, samples[i].mCount - previousCount);
+ if (hasNumber) {
+ writer.IntElement(NUMBER, static_cast<int64_t>(samples[i].mNumber -
+ previousNumber));
+ }
+ previousNumber = samples[i].mNumber;
+ previousCount = samples[i].mCount;
+ }
+ }
+ aWriter.EndArray(); // data
+ aWriter.EndObject(); // samples
+ aWriter.End(); // for each counter
+ }
+ aWriter.EndArray(); // counters
+ });
+}
+
+#undef ERROR_AND_CONTINUE
+
+static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason,
+ const Maybe<double>& aStartTime,
+ const Maybe<double>& aEndTime) {
+ aWriter.Start();
+ if (aStartTime) {
+ aWriter.TimeDoubleMsProperty("startTime", *aStartTime);
+ } else {
+ aWriter.NullProperty("startTime");
+ }
+ if (aEndTime) {
+ aWriter.TimeDoubleMsProperty("endTime", *aEndTime);
+ } else {
+ aWriter.NullProperty("endTime");
+ }
+ aWriter.StringProperty("reason", MakeStringSpan(aReason));
+ aWriter.End();
+}
+
+void ProfileBuffer::StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+ double aSinceTime) const {
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ Maybe<double> currentPauseStartTime;
+ Maybe<double> currentCollectionStartTime;
+
+ while (e.Has()) {
+ if (e.Get().IsPause()) {
+ currentPauseStartTime = Some(e.Get().GetDouble());
+ } else if (e.Get().IsResume()) {
+ AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+ Some(e.Get().GetDouble()));
+ currentPauseStartTime = Nothing();
+ } else if (e.Get().IsCollectionStart()) {
+ currentCollectionStartTime = Some(e.Get().GetDouble());
+ } else if (e.Get().IsCollectionEnd()) {
+ AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+ Some(e.Get().GetDouble()));
+ currentCollectionStartTime = Nothing();
+ }
+ e.Next();
+ }
+
+ if (currentPauseStartTime) {
+ AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+ Nothing());
+ }
+ if (currentCollectionStartTime) {
+ AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+ Nothing());
+ }
+ });
+}
+
+bool ProfileBuffer::DuplicateLastSample(BaseProfilerThreadId aThreadId,
+ const TimeStamp& aProcessStartTime,
+ Maybe<uint64_t>& aLastSample) {
+ if (!aLastSample) {
+ return false;
+ }
+
+ ProfileChunkedBuffer tempBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex, WorkerChunkManager());
+
+ auto retrieveWorkerChunk = MakeScopeExit(
+ [&]() { WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); });
+
+ const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader, *aLastSample);
+
+ if (e.CurPos() != *aLastSample) {
+ // The last sample is no longer within the buffer range, so we cannot
+ // use it. Reset the stored buffer position to Nothing().
+ aLastSample.reset();
+ return false;
+ }
+
+ MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() &&
+ e.Get().GetThreadId() == aThreadId);
+
+ e.Next();
+
+ // Go through the whole entry and duplicate it, until we find the next
+ // one.
+ while (e.Has()) {
+ switch (e.Get().GetKind()) {
+ case ProfileBufferEntry::Kind::Pause:
+ case ProfileBufferEntry::Kind::Resume:
+ case ProfileBufferEntry::Kind::PauseSampling:
+ case ProfileBufferEntry::Kind::ResumeSampling:
+ case ProfileBufferEntry::Kind::CollectionStart:
+ case ProfileBufferEntry::Kind::CollectionEnd:
+ case ProfileBufferEntry::Kind::ThreadId:
+ // We're done.
+ return true;
+ case ProfileBufferEntry::Kind::Time:
+ // Copy with new time
+ AddEntry(
+ tempBuffer,
+ ProfileBufferEntry::Time(
+ (TimeStamp::Now() - aProcessStartTime).ToMilliseconds()));
+ break;
+ case ProfileBufferEntry::Kind::Number:
+ case ProfileBufferEntry::Kind::Count:
+ case ProfileBufferEntry::Kind::Responsiveness:
+ // Don't copy anything not part of a thread's stack sample
+ break;
+ case ProfileBufferEntry::Kind::CounterId:
+ // CounterId is normally followed by Time - if so, we'd like
+ // to skip it. If we duplicate Time, it won't hurt anything, just
+ // waste buffer space (and this can happen if the CounterId has
+ // fallen off the end of the buffer, but Time (and Number/Count)
+ // are still in the buffer).
+ e.Next();
+ if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) {
+ // this would only happen if there was an invalid sequence
+ // in the buffer. Don't skip it.
+ continue;
+ }
+ // we've skipped Time
+ break;
+ case ProfileBufferEntry::Kind::ProfilerOverheadTime:
+ // ProfilerOverheadTime is normally followed by
+ // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't
+ // duplicate, as we are in the middle of a sampling and will soon
+ // capture its own overhead.
+ e.Next();
+ // A missing Time would only happen if there was an invalid
+ // sequence in the buffer. Don't skip unexpected entry.
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ // we've skipped ProfilerOverheadTime and
+ // ProfilerOverheadDuration*4.
+ break;
+ default: {
+ // Copy anything else we don't know about.
+ AddEntry(tempBuffer, e.Get());
+ break;
+ }
+ }
+ e.Next();
+ }
+ return true;
+ });
+
+ if (!ok) {
+ return false;
+ }
+
+ // If the buffer was big enough, there won't be any cleared blocks.
+ if (tempBuffer.GetState().mClearedBlockCount != 0) {
+ // No need to try to read stack again as it won't fit. Reset the stored
+ // buffer position to Nothing().
+ aLastSample.reset();
+ return false;
+ }
+
+ aLastSample = Some(AddThreadIdEntry(aThreadId));
+
+ tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader, "tempBuffer cannot be out-of-session");
+
+ EntryGetter e(*aReader);
+
+ while (e.Has()) {
+ AddEntry(e.Get());
+ e.Next();
+ }
+ });
+
+ return true;
+}
+
+void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) {
+ // This function does nothing!
+ // The duration limit will be removed from Firefox, see bug 1632365.
+ Unused << aTime;
+}
+
+// END ProfileBuffer
+////////////////////////////////////////////////////////////////////////
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.h b/mozglue/baseprofiler/core/ProfileBufferEntry.h
new file mode 100644
index 0000000000..ee6e401bd8
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.h
@@ -0,0 +1,364 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntry_h
+#define ProfileBufferEntry_h
+
+#include "BaseProfilingCategory.h"
+#include "gtest/MozGtestFriend.h"
+#include "mozilla/BaseProfileJSONWriter.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferEntryKinds.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+
+#include <string>
+#include <type_traits>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBufferEntry {
+ public:
+ using KindUnderlyingType =
+ std::underlying_type_t<::mozilla::ProfileBufferEntryKind>;
+ using Kind = ::mozilla::ProfileBufferEntryKind;
+
+ ProfileBufferEntry();
+
+ static constexpr size_t kNumChars = ::mozilla::ProfileBufferEntryNumChars;
+
+ private:
+ // aString must be a static string.
+ ProfileBufferEntry(Kind aKind, const char* aString);
+ ProfileBufferEntry(Kind aKind, char aChars[kNumChars]);
+ ProfileBufferEntry(Kind aKind, void* aPtr);
+ ProfileBufferEntry(Kind aKind, double aDouble);
+ ProfileBufferEntry(Kind aKind, int64_t aInt64);
+ ProfileBufferEntry(Kind aKind, uint64_t aUint64);
+ ProfileBufferEntry(Kind aKind, int aInt);
+ ProfileBufferEntry(Kind aKind, BaseProfilerThreadId aThreadId);
+
+ public:
+#define CTOR(KIND, TYPE, SIZE) \
+ static ProfileBufferEntry KIND(TYPE aVal) { \
+ return ProfileBufferEntry(Kind::KIND, aVal); \
+ }
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR)
+#undef CTOR
+
+ Kind GetKind() const { return mKind; }
+
+#define IS_KIND(KIND, TYPE, SIZE) \
+ bool Is##KIND() const { return mKind == Kind::KIND; }
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND)
+#undef IS_KIND
+
+ private:
+ FRIEND_TEST(ThreadProfile, InsertOneEntry);
+ FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer);
+ FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap);
+ FRIEND_TEST(ThreadProfile, InsertEntriesWrap);
+ FRIEND_TEST(ThreadProfile, MemoryMeasure);
+ friend class ProfileBuffer;
+
+ Kind mKind;
+ uint8_t mStorage[kNumChars];
+
+ const char* GetString() const;
+ void* GetPtr() const;
+ double GetDouble() const;
+ int GetInt() const;
+ int64_t GetInt64() const;
+ uint64_t GetUint64() const;
+ BaseProfilerThreadId GetThreadId() const;
+ void CopyCharsInto(char (&aOutArray)[kNumChars]) const;
+};
+
+// Packed layout: 1 byte for the tag + 8 bytes for the value.
+static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size");
+
+class UniqueStacks {
+ public:
+ struct FrameKey {
+ explicit FrameKey(const char* aLocation)
+ : mData(NormalFrameData{std::string(aLocation), false, 0, Nothing(),
+ Nothing()}) {}
+
+ FrameKey(std::string&& aLocation, bool aRelevantForJS,
+ uint64_t aInnerWindowID, const Maybe<unsigned>& aLine,
+ const Maybe<unsigned>& aColumn,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair)
+ : mData(NormalFrameData{aLocation, aRelevantForJS, aInnerWindowID,
+ aLine, aColumn, aCategoryPair}) {}
+
+ FrameKey(const FrameKey& aToCopy) = default;
+
+ uint32_t Hash() const;
+ bool operator==(const FrameKey& aOther) const {
+ return mData == aOther.mData;
+ }
+
+ struct NormalFrameData {
+ bool operator==(const NormalFrameData& aOther) const;
+
+ std::string mLocation;
+ bool mRelevantForJS;
+ uint64_t mInnerWindowID;
+ Maybe<unsigned> mLine;
+ Maybe<unsigned> mColumn;
+ Maybe<ProfilingCategoryPair> mCategoryPair;
+ };
+ Variant<NormalFrameData> mData;
+ };
+
+ struct FrameKeyHasher {
+ using Lookup = FrameKey;
+
+ static HashNumber hash(const FrameKey& aLookup) {
+ HashNumber hash = 0;
+ if (aLookup.mData.is<FrameKey::NormalFrameData>()) {
+ const FrameKey::NormalFrameData& data =
+ aLookup.mData.as<FrameKey::NormalFrameData>();
+ if (!data.mLocation.empty()) {
+ hash = AddToHash(hash, HashString(data.mLocation.c_str()));
+ }
+ hash = AddToHash(hash, data.mRelevantForJS);
+ hash = mozilla::AddToHash(hash, data.mInnerWindowID);
+ if (data.mLine.isSome()) {
+ hash = AddToHash(hash, *data.mLine);
+ }
+ if (data.mColumn.isSome()) {
+ hash = AddToHash(hash, *data.mColumn);
+ }
+ if (data.mCategoryPair.isSome()) {
+ hash = AddToHash(hash, static_cast<uint32_t>(*data.mCategoryPair));
+ }
+ }
+ return hash;
+ }
+
+ static bool match(const FrameKey& aKey, const FrameKey& aLookup) {
+ return aKey == aLookup;
+ }
+
+ static void rekey(FrameKey& aKey, const FrameKey& aNewKey) {
+ aKey = aNewKey;
+ }
+ };
+
+ struct StackKey {
+ Maybe<uint32_t> mPrefixStackIndex;
+ uint32_t mFrameIndex;
+
+ explicit StackKey(uint32_t aFrame)
+ : mFrameIndex(aFrame), mHash(HashGeneric(aFrame)) {}
+
+ StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex,
+ uint32_t aFrame)
+ : mPrefixStackIndex(Some(aPrefixStackIndex)),
+ mFrameIndex(aFrame),
+ mHash(AddToHash(aPrefix.mHash, aFrame)) {}
+
+ HashNumber Hash() const { return mHash; }
+
+ bool operator==(const StackKey& aOther) const {
+ return mPrefixStackIndex == aOther.mPrefixStackIndex &&
+ mFrameIndex == aOther.mFrameIndex;
+ }
+
+ private:
+ HashNumber mHash;
+ };
+
+ struct StackKeyHasher {
+ using Lookup = StackKey;
+
+ static HashNumber hash(const StackKey& aLookup) { return aLookup.Hash(); }
+
+ static bool match(const StackKey& aKey, const StackKey& aLookup) {
+ return aKey == aLookup;
+ }
+
+ static void rekey(StackKey& aKey, const StackKey& aNewKey) {
+ aKey = aNewKey;
+ }
+ };
+
+ UniqueStacks();
+
+ // Return a StackKey for aFrame as the stack's root frame (no prefix).
+ [[nodiscard]] StackKey BeginStack(const FrameKey& aFrame);
+
+ // Return a new StackKey that is obtained by appending aFrame to aStack.
+ [[nodiscard]] StackKey AppendFrame(const StackKey& aStack,
+ const FrameKey& aFrame);
+
+ [[nodiscard]] uint32_t GetOrAddFrameIndex(const FrameKey& aFrame);
+ [[nodiscard]] uint32_t GetOrAddStackIndex(const StackKey& aStack);
+
+ void SpliceFrameTableElements(SpliceableJSONWriter& aWriter);
+ void SpliceStackTableElements(SpliceableJSONWriter& aWriter);
+
+ UniqueJSONStrings& UniqueStrings() {
+ MOZ_RELEASE_ASSERT(mUniqueStrings.get());
+ return *mUniqueStrings;
+ }
+
+ private:
+ void StreamNonJITFrame(const FrameKey& aFrame);
+ void StreamStack(const StackKey& aStack);
+
+ UniquePtr<UniqueJSONStrings> mUniqueStrings;
+
+ SpliceableChunkedJSONWriter mFrameTableWriter;
+ HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap;
+
+ SpliceableChunkedJSONWriter mStackTableWriter;
+ HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap;
+};
+
+//
+// Thread profile JSON Format
+// --------------------------
+//
+// The profile contains much duplicate information. The output JSON of the
+// profile attempts to deduplicate strings, frames, and stack prefixes, to cut
+// down on size and to increase JSON streaming speed. Deduplicated values are
+// streamed as indices into their respective tables.
+//
+// Further, arrays of objects with the same set of properties (e.g., samples,
+// frames) are output as arrays according to a schema instead of an object
+// with property names. A property that is not present is represented in the
+// array as null or undefined.
+//
+// The format of the thread profile JSON is shown by the following example
+// with 1 sample and 1 marker:
+//
+// {
+// "name": "Foo",
+// "tid": 42,
+// "samples":
+// {
+// "schema":
+// {
+// "stack": 0, /* index into stackTable */
+// "time": 1, /* number */
+// "eventDelay": 2, /* number */
+// },
+// "data":
+// [
+// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, eventDelay: 0.0 } */
+// ]
+// },
+//
+// "markers":
+// {
+// "schema":
+// {
+// "name": 0, /* index into stringTable */
+// "time": 1, /* number */
+// "data": 2 /* arbitrary JSON */
+// },
+// "data":
+// [
+// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */
+// ]
+// },
+//
+// "stackTable":
+// {
+// "schema":
+// {
+// "prefix": 0, /* index into stackTable */
+// "frame": 1 /* index into frameTable */
+// },
+// "data":
+// [
+// [ null, 0 ], /* (root) */
+// [ 0, 1 ] /* (root) > foo.js */
+// ]
+// },
+//
+// "frameTable":
+// {
+// "schema":
+// {
+// "location": 0, /* index into stringTable */
+// "relevantForJS": 1, /* bool */
+// "innerWindowID": 2, /* inner window ID of global JS `window` object */
+// "implementation": 3, /* index into stringTable */
+// "line": 4, /* number */
+// "column": 5, /* number */
+// "category": 6, /* index into profile.meta.categories */
+// "subcategory": 7 /* index into
+// profile.meta.categories[category].subcategories */
+// },
+// "data":
+// [
+// [ 0 ], /* { location: '(root)' } */
+// [ 1, 2 ] /* { location: 'foo.js',
+// implementation: 'baseline' } */
+// ]
+// },
+//
+// "stringTable":
+// [
+// "(root)",
+// "foo.js",
+// "baseline",
+// "example marker"
+// ]
+// }
+//
+// Process:
+// {
+// "name": "Bar",
+// "pid": 24,
+// "threads":
+// [
+// <0-N threads from above>
+// ],
+// "counters": /* includes the memory counter */
+// [
+// {
+// "name": "qwerty",
+// "category": "uiop",
+// "description": "this is qwerty uiop",
+// "sample_groups:
+// [
+// {
+// "id": 42, /* number (thread id, or object identifier (tab), etc) */
+// "samples:
+// {
+// "schema":
+// {
+// "time": 1, /* number */
+// "number": 2, /* number (of times the counter was touched) */
+// "count": 3 /* number (total for the counter) */
+// },
+// "data":
+// [
+// [ 0.1, 1824,
+// 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */
+// ]
+// },
+// },
+// /* more sample-group objects with different id's */
+// ]
+// },
+// /* more counters */
+// ],
+// }
+//
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* ndef ProfileBufferEntry_h */
diff --git a/mozglue/baseprofiler/core/ProfileJSONWriter.cpp b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
new file mode 100644
index 0000000000..dc51f6958e
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
@@ -0,0 +1,101 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla::baseprofiler {
+
+UniqueJSONStrings::UniqueJSONStrings(FailureLatch& aFailureLatch)
+ : mStringTableWriter(aFailureLatch) {
+ mStringTableWriter.StartBareList();
+ if (const char* failure = mStringTableWriter.GetFailure(); failure) {
+ ClearAndSetFailure(failure);
+ return;
+ }
+}
+
+UniqueJSONStrings::UniqueJSONStrings(FailureLatch& aFailureLatch,
+ const UniqueJSONStrings& aOther,
+ ProgressLogger aProgressLogger)
+ : mStringTableWriter(aFailureLatch) {
+ using namespace mozilla::literals::ProportionValue_literals; // For `10_pc`.
+
+ if (mStringTableWriter.Failed()) {
+ return;
+ }
+
+ if (const char* failure = aOther.GetFailure(); failure) {
+ ClearAndSetFailure(failure);
+ return;
+ }
+
+ mStringTableWriter.StartBareList();
+ uint32_t count = aOther.mStringHashToIndexMap.count();
+ if (count != 0) {
+ if (!mStringHashToIndexMap.reserve(count)) {
+ ClearAndSetFailure("Cannot reserve UniqueJSONStrings map storage");
+ return;
+ }
+ auto iter = aOther.mStringHashToIndexMap.iter();
+ for (auto&& [unusedIndex, progressLogger] :
+ aProgressLogger.CreateLoopSubLoggersFromTo(
+ 10_pc, 90_pc, count, "Copying unique strings...")) {
+ (void)unusedIndex;
+ if (iter.done()) {
+ break;
+ }
+ mStringHashToIndexMap.putNewInfallible(iter.get().key(),
+ iter.get().value());
+ iter.next();
+ }
+ aProgressLogger.SetLocalProgress(90_pc, "Copied unique strings");
+ mStringTableWriter.CopyAndSplice(
+ aOther.mStringTableWriter.ChunkedWriteFunc());
+ if (const char* failure = aOther.GetFailure(); failure) {
+ ClearAndSetFailure(failure);
+ }
+ aProgressLogger.SetLocalProgress(100_pc, "Spliced unique strings");
+ }
+}
+
+UniqueJSONStrings::~UniqueJSONStrings() = default;
+
+void UniqueJSONStrings::SpliceStringTableElements(
+ SpliceableJSONWriter& aWriter) {
+ aWriter.TakeAndSplice(mStringTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueJSONStrings::ClearAndSetFailure(std::string aFailure) {
+ mStringTableWriter.SetFailure(std::move(aFailure));
+ mStringHashToIndexMap.clear();
+}
+
+Maybe<uint32_t> UniqueJSONStrings::GetOrAddIndex(const Span<const char>& aStr) {
+ if (Failed()) {
+ return Nothing{};
+ }
+
+ uint32_t count = mStringHashToIndexMap.count();
+ HashNumber hash = HashString(aStr.data(), aStr.size());
+ auto entry = mStringHashToIndexMap.lookupForAdd(hash);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return Some(entry->value());
+ }
+
+ if (!mStringHashToIndexMap.add(entry, hash, count)) {
+ ClearAndSetFailure("OOM in UniqueJSONStrings::GetOrAddIndex adding a map");
+ return Nothing{};
+ }
+ mStringTableWriter.StringElement(aStr);
+ if (const char* failure = mStringTableWriter.GetFailure(); failure) {
+ ClearAndSetFailure(failure);
+ return Nothing{};
+ }
+ return Some(count);
+}
+
+} // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.cpp b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
new file mode 100644
index 0000000000..62cb994ae2
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
@@ -0,0 +1,191 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfiledThreadData.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+#if defined(GP_OS_darwin)
+# include <pthread.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfiledThreadData::ProfiledThreadData(ThreadInfo* aThreadInfo)
+ : mThreadInfo(aThreadInfo) {}
+
+ProfiledThreadData::~ProfiledThreadData() {}
+
+void ProfiledThreadData::StreamJSON(const ProfileBuffer& aBuffer,
+ SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName,
+ const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) {
+ UniqueStacks uniqueStacks;
+
+ aWriter.SetUniqueStrings(uniqueStacks.UniqueStrings());
+
+ aWriter.Start();
+ {
+ StreamSamplesAndMarkers(mThreadInfo->Name(), mThreadInfo->ThreadId(),
+ aBuffer, aWriter, aProcessName, aETLDplus1,
+ aProcessStartTime, mThreadInfo->RegisterTime(),
+ mUnregisterTime, aSinceTime, uniqueStacks);
+
+ aWriter.StartObjectProperty("stackTable");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("prefix");
+ schema.WriteField("frame");
+ }
+
+ aWriter.StartArrayProperty("data");
+ { uniqueStacks.SpliceStackTableElements(aWriter); }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartObjectProperty("frameTable");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("location");
+ schema.WriteField("relevantForJS");
+ schema.WriteField("innerWindowID");
+ schema.WriteField("implementation");
+ schema.WriteField("line");
+ schema.WriteField("column");
+ schema.WriteField("category");
+ schema.WriteField("subcategory");
+ }
+
+ aWriter.StartArrayProperty("data");
+ { uniqueStacks.SpliceFrameTableElements(aWriter); }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartArrayProperty("stringTable");
+ {
+ std::move(uniqueStacks.UniqueStrings())
+ .SpliceStringTableElements(aWriter);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.End();
+
+ aWriter.ResetUniqueStrings();
+}
+
+BaseProfilerThreadId StreamSamplesAndMarkers(
+ const char* aName, BaseProfilerThreadId aThreadId,
+ const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName, const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime, const TimeStamp& aRegisterTime,
+ const TimeStamp& aUnregisterTime, double aSinceTime,
+ UniqueStacks& aUniqueStacks) {
+ BaseProfilerThreadId processedThreadId;
+
+ aWriter.StringProperty(
+ "processType",
+ "(unknown)" /* XRE_GeckoProcessTypeToString(XRE_GetProcessType()) */);
+
+ {
+ std::string name = aName;
+ // We currently need to distinguish threads output by Base Profiler from
+ // those in Gecko Profiler, as the frontend could get confused and lose
+ // tracks with the same name.
+ // TODO: As part of the profilers de-duplication, thread data from both
+ // profilers should end up in the same track, at which point this won't be
+ // necessary anymore. See meta bug 1557566.
+ name += " (pre-xul)";
+ aWriter.StringProperty("name", name);
+ }
+
+ // Use given process name (if any).
+ if (!aProcessName.empty()) {
+ aWriter.StringProperty("processName", aProcessName);
+ }
+ if (!aETLDplus1.empty()) {
+ aWriter.StringProperty("eTLD+1", aETLDplus1);
+ }
+
+ if (aRegisterTime) {
+ aWriter.DoubleProperty(
+ "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds());
+ } else {
+ aWriter.NullProperty("registerTime");
+ }
+
+ if (aUnregisterTime) {
+ aWriter.DoubleProperty(
+ "unregisterTime",
+ (aUnregisterTime - aProcessStartTime).ToMilliseconds());
+ } else {
+ aWriter.NullProperty("unregisterTime");
+ }
+
+ aWriter.StartObjectProperty("samples");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("stack");
+ schema.WriteField("time");
+ schema.WriteField("eventDelay");
+ }
+
+ aWriter.StartArrayProperty("data");
+ {
+ processedThreadId = aBuffer.StreamSamplesToJSON(
+ aWriter, aThreadId, aSinceTime, aUniqueStacks);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartObjectProperty("markers");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("name");
+ schema.WriteField("startTime");
+ schema.WriteField("endTime");
+ schema.WriteField("phase");
+ schema.WriteField("category");
+ schema.WriteField("data");
+ }
+
+ aWriter.StartArrayProperty("data");
+ {
+ aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime,
+ aSinceTime, aUniqueStacks);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ // Tech note: If `ToNumber()` returns a uint64_t, the conversion to int64_t is
+ // "implementation-defined" before C++20. This is acceptable here, because
+ // this is a one-way conversion to a unique identifier that's used to visually
+ // separate data by thread on the front-end.
+ aWriter.IntProperty(
+ "pid", static_cast<int64_t>(profiler_current_process_id().ToNumber()));
+ aWriter.IntProperty("tid",
+ static_cast<int64_t>(aThreadId.IsSpecified()
+ ? aThreadId.ToNumber()
+ : processedThreadId.ToNumber()));
+
+ return processedThreadId;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.h b/mozglue/baseprofiler/core/ProfiledThreadData.h
new file mode 100644
index 0000000000..0590b03326
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.h
@@ -0,0 +1,120 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfiledThreadData_h
+#define ProfiledThreadData_h
+
+#include "BaseProfilingStack.h"
+#include "platform.h"
+#include "ProfileBufferEntry.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBuffer;
+
+// This class contains information about a thread that is only relevant while
+// the profiler is running, for any threads (both alive and dead) whose thread
+// name matches the "thread filter" in the current profiler run.
+// ProfiledThreadData objects may be kept alive even after the thread is
+// unregistered, as long as there is still data for that thread in the profiler
+// buffer.
+//
+// Accesses to this class are protected by the profiler state lock.
+//
+// Created as soon as the following are true for the thread:
+// - The profiler is running, and
+// - the thread matches the profiler's thread filter, and
+// - the thread is registered with the profiler.
+// So it gets created in response to either (1) the profiler being started (for
+// an existing registered thread) or (2) the thread being registered (if the
+// profiler is already running).
+//
+// The thread may be unregistered during the lifetime of ProfiledThreadData.
+// If that happens, NotifyUnregistered() is called.
+//
+// This class is the right place to store buffer positions. Profiler buffer
+// positions become invalid if the profiler buffer is destroyed, which happens
+// when the profiler is stopped.
+class ProfiledThreadData final {
+ public:
+ explicit ProfiledThreadData(ThreadInfo* aThreadInfo);
+ ~ProfiledThreadData();
+
+ void NotifyUnregistered(uint64_t aBufferPosition) {
+ mLastSample = Nothing();
+ MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext,
+ "JSContext should have been cleared before the thread was "
+ "unregistered");
+ mUnregisterTime = TimeStamp::Now();
+ mBufferPositionWhenUnregistered = Some(aBufferPosition);
+ }
+ Maybe<uint64_t> BufferPositionWhenUnregistered() {
+ return mBufferPositionWhenUnregistered;
+ }
+
+ Maybe<uint64_t>& LastSample() { return mLastSample; }
+
+ void StreamJSON(const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName,
+ const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime, double aSinceTime);
+
+ const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+ void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) {
+ mBufferPositionWhenReceivedJSContext = Some(aCurrentBufferPosition);
+ }
+
+ private:
+ // Group A:
+ // The following fields are interesting for the entire lifetime of a
+ // ProfiledThreadData object.
+
+ // This thread's thread info.
+ const RefPtr<ThreadInfo> mThreadInfo;
+
+ // Group B:
+ // The following fields are only used while this thread is alive and
+ // registered. They become Nothing() once the thread is unregistered.
+
+ // When sampling, this holds the position in ActivePS::mBuffer of the most
+ // recent sample for this thread, or Nothing() if there is no sample for this
+ // thread in the buffer.
+ Maybe<uint64_t> mLastSample;
+
+ // Only non-Nothing() if the thread currently has a JSContext.
+ Maybe<uint64_t> mBufferPositionWhenReceivedJSContext;
+
+ // Group C:
+ // The following fields are only used once this thread has been unregistered.
+
+ Maybe<uint64_t> mBufferPositionWhenUnregistered;
+ TimeStamp mUnregisterTime;
+};
+
+// Stream all samples and markers from aBuffer with the given aThreadId (or 0
+// for everything, which is assumed to be a single backtrace sample.)
+// Returns the thread id of the output sample(s), or 0 if none was present.
+BaseProfilerThreadId StreamSamplesAndMarkers(
+ const char* aName, BaseProfilerThreadId aThreadId,
+ const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName, const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime, const TimeStamp& aRegisterTime,
+ const TimeStamp& aUnregisterTime, double aSinceTime,
+ UniqueStacks& aUniqueStacks);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // ProfiledThreadData_h
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.cpp b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
new file mode 100644
index 0000000000..8343888351
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
@@ -0,0 +1,125 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerBacktrace.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilerBacktrace::ProfilerBacktrace(
+ const char* aName,
+ UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+ UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull /* = nullptr */)
+ : mName(aName),
+ mOptionalProfileChunkedBufferStorage(
+ std::move(aProfileChunkedBufferStorage)),
+ mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()),
+ mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)),
+ mProfileBuffer(mOptionalProfileBufferStorage.get()) {
+ if (mProfileBuffer) {
+ MOZ_RELEASE_ASSERT(mProfileChunkedBuffer,
+ "If we take ownership of a ProfileBuffer, we must also "
+ "receive ownership of a ProfileChunkedBuffer");
+ MOZ_RELEASE_ASSERT(
+ mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+ "If we take ownership of a ProfileBuffer, we must also receive "
+ "ownership of its ProfileChunkedBuffer");
+ }
+ MOZ_ASSERT(
+ !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer");
+}
+
+ProfilerBacktrace::ProfilerBacktrace(
+ const char* aName,
+ ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull /* = nullptr */,
+ ProfileBuffer* aExternalProfileBufferOrNull /* = nullptr */)
+ : mName(aName),
+ mProfileChunkedBuffer(aExternalProfileChunkedBufferOrNull),
+ mProfileBuffer(aExternalProfileBufferOrNull) {
+ if (!mProfileChunkedBuffer) {
+ if (mProfileBuffer) {
+ // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use
+ // the latter's ProfileChunkedBuffer.
+ mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer();
+ MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe "
+ "ProfileChunkedBuffer");
+ }
+ } else {
+ if (mProfileBuffer) {
+ MOZ_RELEASE_ASSERT(
+ mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+ "If we reference both ProfileChunkedBuffer and ProfileBuffer, they "
+ "must already be connected");
+ }
+ MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe "
+ "ProfileChunkedBuffer");
+ }
+}
+
+ProfilerBacktrace::~ProfilerBacktrace() {}
+
+BaseProfilerThreadId ProfilerBacktrace::StreamJSON(
+ SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+ UniqueStacks& aUniqueStacks) {
+ BaseProfilerThreadId processedThreadId;
+
+ // Unlike ProfiledThreadData::StreamJSON, we don't need to call
+ // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain
+ // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded
+ // at sample time.
+ if (mProfileBuffer) {
+ processedThreadId = StreamSamplesAndMarkers(
+ mName.c_str(), BaseProfilerThreadId{}, *mProfileBuffer, aWriter, "", "",
+ aProcessStartTime,
+ /* aRegisterTime */ TimeStamp(),
+ /* aUnregisterTime */ TimeStamp(),
+ /* aSinceTime */ 0, aUniqueStacks);
+ } else if (mProfileChunkedBuffer) {
+ ProfileBuffer profileBuffer(*mProfileChunkedBuffer);
+ processedThreadId = StreamSamplesAndMarkers(
+ mName.c_str(), BaseProfilerThreadId{}, profileBuffer, aWriter, "", "",
+ aProcessStartTime,
+ /* aRegisterTime */ TimeStamp(),
+ /* aUnregisterTime */ TimeStamp(),
+ /* aSinceTime */ 0, aUniqueStacks);
+ }
+ // If there are no buffers, the backtrace is empty and nothing is streamed.
+
+ return processedThreadId;
+}
+
+} // namespace baseprofiler
+
+// static
+template <typename Destructor>
+UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>
+ProfileBufferEntryReader::
+ Deserializer<UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>>::Read(
+ ProfileBufferEntryReader& aER) {
+ auto profileChunkedBuffer = aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>();
+ if (!profileChunkedBuffer) {
+ return nullptr;
+ }
+ MOZ_ASSERT(
+ !profileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers");
+ std::string name = aER.ReadObject<std::string>();
+ return UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>{
+ new baseprofiler::ProfilerBacktrace(name.c_str(),
+ std::move(profileChunkedBuffer))};
+};
+
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.h b/mozglue/baseprofiler/core/ProfilerBacktrace.h
new file mode 100644
index 0000000000..0b5b69e738
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.h
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __PROFILER_BACKTRACE_H
+#define __PROFILER_BACKTRACE_H
+
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/UniquePtr.h"
+
+#include <string>
+
+namespace mozilla {
+
+class TimeStamp;
+
+namespace baseprofiler {
+
+class ProfileBuffer;
+class SpliceableJSONWriter;
+class ThreadInfo;
+class UniqueStacks;
+
+// ProfilerBacktrace encapsulates a synchronous sample.
+// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they
+// must already be linked together). The ProfileChunkedBuffer contains all the
+// data; the ProfileBuffer is not strictly needed, only provide it if it is
+// already available at the call site.
+// And these buffers can either be:
+// - owned here, so that the ProfilerBacktrace object can be kept for later
+// use), OR
+// - referenced through pointers (in cases where the backtrace is immediately
+// streamed out, so we only need temporary references to external buffers);
+// these pointers may be null for empty backtraces.
+class ProfilerBacktrace {
+ public:
+ // Take ownership of external buffers and use them to keep, and to stream a
+ // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must
+ // be provided as well.
+ explicit ProfilerBacktrace(
+ const char* aName,
+ UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+ UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr);
+
+ // Take pointers to external buffers and use them to stream a backtrace.
+ // If null, the backtrace is effectively empty.
+ // If both are provided, they must already be connected.
+ explicit ProfilerBacktrace(
+ const char* aName,
+ ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = nullptr,
+ ProfileBuffer* aExternalProfileBufferOrNull = nullptr);
+
+ ~ProfilerBacktrace();
+
+ [[nodiscard]] bool IsEmpty() const {
+ return !mProfileChunkedBuffer ||
+ ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>::Bytes(
+ *mProfileChunkedBuffer) <= ULEB128Size(0u);
+ }
+
+ // ProfilerBacktraces' stacks are deduplicated in the context of the
+ // profile that contains the backtrace as a marker payload.
+ //
+ // That is, markers that contain backtraces should not need their own stack,
+ // frame, and string tables. They should instead reuse their parent
+ // profile's tables.
+ BaseProfilerThreadId StreamJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ UniqueStacks& aUniqueStacks);
+
+ private:
+ // Used to de/serialize a ProfilerBacktrace.
+ friend ProfileBufferEntryWriter::Serializer<ProfilerBacktrace>;
+ friend ProfileBufferEntryReader::Deserializer<ProfilerBacktrace>;
+
+ std::string mName;
+
+ // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be
+ // located before `mProfileBuffer` so that it's destroyed after.
+ UniquePtr<ProfileChunkedBuffer> mOptionalProfileChunkedBufferStorage;
+ // If null, there is no need to check mProfileBuffer's (if present) underlying
+ // buffer because this is done when constructed.
+ ProfileChunkedBuffer* mProfileChunkedBuffer;
+
+ UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage;
+ ProfileBuffer* mProfileBuffer;
+};
+
+} // namespace baseprofiler
+
+// Format: [ UniquePtr<BlockRingsBuffer> | name ]
+// Initial len==0 marks a nullptr or empty backtrace.
+template <>
+struct ProfileBufferEntryWriter::Serializer<baseprofiler::ProfilerBacktrace> {
+ static Length Bytes(const baseprofiler::ProfilerBacktrace& aBacktrace) {
+ if (!aBacktrace.mProfileChunkedBuffer) {
+ // No buffer.
+ return ULEB128Size(0u);
+ }
+ auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer);
+ if (bufferBytes <= ULEB128Size(0u)) {
+ // Empty buffer.
+ return ULEB128Size(0u);
+ }
+ return bufferBytes + SumBytes(aBacktrace.mName);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const baseprofiler::ProfilerBacktrace& aBacktrace) {
+ if (!aBacktrace.mProfileChunkedBuffer ||
+ SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) {
+ // No buffer, or empty buffer.
+ aEW.WriteULEB128(0u);
+ return;
+ }
+ aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer);
+ aEW.WriteObject(aBacktrace.mName);
+ }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryWriter::Serializer<
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+ static Length Bytes(const UniquePtr<baseprofiler::ProfilerBacktrace,
+ Destructor>& aBacktrace) {
+ if (!aBacktrace) {
+ // Null backtrace pointer (treated like an empty backtrace).
+ return ULEB128Size(0u);
+ }
+ return SumBytes(*aBacktrace);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const UniquePtr<baseprofiler::ProfilerBacktrace,
+ Destructor>& aBacktrace) {
+ if (!aBacktrace) {
+ // Null backtrace pointer (treated like an empty backtrace).
+ aEW.WriteULEB128(0u);
+ return;
+ }
+ aEW.WriteObject(*aBacktrace);
+ }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryReader::Deserializer<
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+ static void ReadInto(
+ ProfileBufferEntryReader& aER,
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>& aBacktrace) {
+ aBacktrace = Read(aER);
+ }
+
+ static UniquePtr<baseprofiler::ProfilerBacktrace, Destructor> Read(
+ ProfileBufferEntryReader& aER);
+};
+
+} // namespace mozilla
+
+#endif // __PROFILER_BACKTRACE_H
diff --git a/mozglue/baseprofiler/core/ProfilerMarkers.cpp b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
new file mode 100644
index 0000000000..2a6115c166
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
@@ -0,0 +1,415 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+#include "mozilla/BaseProfilerUtils.h"
+
+#include <limits>
+
+namespace mozilla {
+namespace base_profiler_markers_detail {
+
+// We need an atomic type that can hold a `DeserializerTag`. (Atomic doesn't
+// work with too-small types.)
+using DeserializerTagAtomic = unsigned;
+
+// The atomic sDeserializerCount still also include bits that act as a "RWLock":
+// Whoever can set this bit gets exclusive access to the count and the whole
+// sMarkerTypeFunctions1Based array, guaranteeing that it cannot be modified.
+static constexpr DeserializerTagAtomic scExclusiveLock = 0x80'00'00'00u;
+// Code that wants shared access can add this value, then ensure there is no
+// exclusive lock, after which it's guaranteed that no exclusive lock can be
+// taken until the shared lock count goes back to zero.
+static constexpr DeserializerTagAtomic scSharedLockUnit = 0x00'01'00'00u;
+// This mask isolates the actual count value from the lock bits.
+static constexpr DeserializerTagAtomic scTagMask = 0x00'00'FF'FFu;
+
+// Number of currently-registered deserializers and other marker type functions.
+// The high bits contain lock bits, see above.
+static Atomic<DeserializerTagAtomic, MemoryOrdering::ReleaseAcquire>
+ sDeserializerCount{0};
+
+// This needs to be big enough to handle all possible marker types. If one day
+// this needs to be higher, the underlying DeserializerTag type will have to be
+// changed.
+static constexpr DeserializerTagAtomic DeserializerMax = 250;
+static_assert(DeserializerMax <= scTagMask,
+ "DeserializerMax doesn't fit in scTagMask");
+
+static_assert(
+ DeserializerMax <= std::numeric_limits<Streaming::DeserializerTag>::max(),
+ "The maximum number of deserializers must fit in the DeserializerTag type");
+
+// Array of marker type functions.
+// 1-based, i.e.: [0] -> tag 1, [DeserializerMax - 1] -> tag DeserializerMax.
+// Elements are added at the next available atomically-incremented
+// `sDeserializerCount` (minus 1) whenever a new marker type is used in a
+// Firefox session; the content is kept between profiler runs in that session.
+// There is theoretically a race between the increment and the time the entry is
+// fully written, but in practice all new elements are written (during
+// profiling, using a marker type for the first time) long before they are read
+// (after profiling is paused).
+static Streaming::MarkerTypeFunctions
+ sMarkerTypeFunctions1Based[DeserializerMax];
+
+/* static */ Streaming::DeserializerTag Streaming::TagForMarkerTypeFunctions(
+ Streaming::MarkerDataDeserializer aDeserializer,
+ Streaming::MarkerTypeNameFunction aMarkerTypeNameFunction,
+ Streaming::MarkerSchemaFunction aMarkerSchemaFunction) {
+ MOZ_RELEASE_ASSERT(!!aDeserializer);
+ MOZ_RELEASE_ASSERT(!!aMarkerTypeNameFunction);
+ MOZ_RELEASE_ASSERT(!!aMarkerSchemaFunction);
+
+ // Add a shared lock request, which will prevent future exclusive locking.
+ DeserializerTagAtomic tagWithLock = (sDeserializerCount += scSharedLockUnit);
+
+ // An exclusive locker may have arrived before us, just wait for it to finish.
+ while ((tagWithLock & scExclusiveLock) != 0u) {
+ tagWithLock = sDeserializerCount;
+ }
+
+ MOZ_ASSERT(
+ // This is equivalent to shifting right to only keep the lock counts.
+ tagWithLock / scSharedLockUnit <
+ // This is effectively half of the permissible shared lock range,
+ // that would mean way too many threads doing this work here!
+ scExclusiveLock / scSharedLockUnit / 2,
+ "The shared lock count is getting unexpectedly high, verify the "
+ "algorithm, and tweak constants if needed");
+
+ // Reserve a tag. Even if there are multiple shared-lock holders here, each
+ // one will get a different value, and therefore will access a different part
+ // of the sMarkerTypeFunctions1Based array.
+ const DeserializerTagAtomic tag = ++sDeserializerCount & scTagMask;
+
+ MOZ_RELEASE_ASSERT(
+ tag <= DeserializerMax,
+ "Too many deserializers, consider increasing DeserializerMax. "
+ "Or is a deserializer stored again and again?");
+ sMarkerTypeFunctions1Based[tag - 1] = {aDeserializer, aMarkerTypeNameFunction,
+ aMarkerSchemaFunction};
+
+ // And release our shared lock, to allow exclusive readers.
+ sDeserializerCount -= scSharedLockUnit;
+
+ return static_cast<DeserializerTag>(tag);
+}
+
+/* static */ Streaming::MarkerDataDeserializer Streaming::DeserializerForTag(
+ Streaming::DeserializerTag aTag) {
+ MOZ_RELEASE_ASSERT(
+ aTag > 0 && static_cast<DeserializerTagAtomic>(aTag) <=
+ static_cast<DeserializerTagAtomic>(sDeserializerCount),
+ "Out-of-range tag value");
+ return sMarkerTypeFunctions1Based[aTag - 1].mMarkerDataDeserializer;
+}
+
+Streaming::LockedMarkerTypeFunctionsList::LockedMarkerTypeFunctionsList() {
+ for (;;) {
+ const DeserializerTagAtomic count = sDeserializerCount;
+ if ((count & scTagMask) != count) {
+ // Someone already has a lock, loop around.
+ continue;
+ }
+
+ // There are currently no locks, try to add our exclusive lock.
+ if (!sDeserializerCount.compareExchange(count, count | scExclusiveLock)) {
+ // Someone else modified sDeserializerCount since our read, loop around.
+ continue;
+ }
+
+ // We applied our exclusive lock, we can now read the list of functions,
+ // without interference until ~LockedMarkerTypeFunctionsList().
+ // (Note that sDeserializerCount may receive shared lock requests, but the
+ // count won't change.)
+ mMarkerTypeFunctionsSpan = {sMarkerTypeFunctions1Based, count};
+ break;
+ }
+}
+
+Streaming::LockedMarkerTypeFunctionsList::~LockedMarkerTypeFunctionsList() {
+ MOZ_ASSERT(
+ (sDeserializerCount & scExclusiveLock) == scExclusiveLock,
+ "sDeserializerCount should still have the the exclusive lock bit set");
+ MOZ_ASSERT(
+ (sDeserializerCount & scTagMask) ==
+ DeserializerTagAtomic(mMarkerTypeFunctionsSpan.size()),
+ "sDeserializerCount should have the same count since construction");
+ sDeserializerCount &= ~scExclusiveLock;
+}
+
+// Only accessed on the main thread.
+// Both profilers (Base and Gecko) could be active at the same time, so keep a
+// ref-count to only allocate at most one buffer at any time.
+static int sBufferForMainThreadAddMarkerRefCount = 0;
+static ProfileChunkedBuffer* sBufferForMainThreadAddMarker = nullptr;
+
+ProfileChunkedBuffer* GetClearedBufferForMainThreadAddMarker() {
+ if (!mozilla::baseprofiler::profiler_is_main_thread()) {
+ return nullptr;
+ }
+
+ if (sBufferForMainThreadAddMarker) {
+ MOZ_ASSERT(sBufferForMainThreadAddMarker->IsInSession(),
+ "sBufferForMainThreadAddMarker should always be in-session");
+ sBufferForMainThreadAddMarker->Clear();
+ MOZ_ASSERT(
+ sBufferForMainThreadAddMarker->IsInSession(),
+ "Cleared sBufferForMainThreadAddMarker should still be in-session");
+ }
+
+ return sBufferForMainThreadAddMarker;
+}
+
+MFBT_API void EnsureBufferForMainThreadAddMarker() {
+ if (!mozilla::baseprofiler::profiler_is_main_thread()) {
+ return;
+ }
+
+ if (sBufferForMainThreadAddMarkerRefCount++ == 0) {
+ // First `Ensure`, allocate the buffer.
+ MOZ_ASSERT(!sBufferForMainThreadAddMarker);
+ sBufferForMainThreadAddMarker = new ProfileChunkedBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+ MakeUnique<ProfileBufferChunkManagerSingle>(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize));
+ MOZ_ASSERT(sBufferForMainThreadAddMarker);
+ MOZ_ASSERT(sBufferForMainThreadAddMarker->IsInSession());
+ }
+}
+
+MFBT_API void ReleaseBufferForMainThreadAddMarker() {
+ if (!mozilla::baseprofiler::profiler_is_main_thread()) {
+ return;
+ }
+
+ if (sBufferForMainThreadAddMarkerRefCount == 0) {
+ // Unexpected Release! This should not normally happen, but it's harmless in
+ // practice, it means the buffer is not alive anyway.
+ return;
+ }
+
+ MOZ_ASSERT(sBufferForMainThreadAddMarker);
+ MOZ_ASSERT(sBufferForMainThreadAddMarker->IsInSession());
+ if (--sBufferForMainThreadAddMarkerRefCount == 0) {
+ // Last `Release`, destroy the buffer.
+ delete sBufferForMainThreadAddMarker;
+ sBufferForMainThreadAddMarker = nullptr;
+ }
+}
+
+} // namespace base_profiler_markers_detail
+
+void MarkerSchema::Stream(JSONWriter& aWriter,
+ const Span<const char>& aName) && {
+ // The caller should have started a JSON array, in which we can add an object
+ // that defines a marker schema.
+
+ if (mLocations.empty()) {
+ // SpecialFrontendLocation case, don't output anything for this type.
+ return;
+ }
+
+ aWriter.StartObjectElement();
+ {
+ aWriter.StringProperty("name", aName);
+
+ if (!mChartLabel.empty()) {
+ aWriter.StringProperty("chartLabel", mChartLabel);
+ }
+
+ if (!mTooltipLabel.empty()) {
+ aWriter.StringProperty("tooltipLabel", mTooltipLabel);
+ }
+
+ if (!mTableLabel.empty()) {
+ aWriter.StringProperty("tableLabel", mTableLabel);
+ }
+
+ aWriter.StartArrayProperty("display");
+ {
+ for (Location location : mLocations) {
+ aWriter.StringElement(LocationToStringSpan(location));
+ }
+ }
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("data");
+ {
+ for (const DataRow& row : mData) {
+ aWriter.StartObjectElement();
+ {
+ row.match(
+ [&aWriter](const DynamicData& aData) {
+ aWriter.StringProperty("key", aData.mKey);
+ if (aData.mLabel) {
+ aWriter.StringProperty("label", *aData.mLabel);
+ }
+ aWriter.StringProperty("format",
+ FormatToStringSpan(aData.mFormat));
+ if (aData.mSearchable) {
+ aWriter.BoolProperty(
+ "searchable",
+ *aData.mSearchable == Searchable::Searchable);
+ }
+ },
+ [&aWriter](const StaticData& aStaticData) {
+ aWriter.StringProperty("label", aStaticData.mLabel);
+ aWriter.StringProperty("value", aStaticData.mValue);
+ });
+ }
+ aWriter.EndObject();
+ }
+ }
+ aWriter.EndArray();
+
+ if (!mGraphs.empty()) {
+ aWriter.StartArrayProperty("graphs");
+ {
+ for (const GraphData& graph : mGraphs) {
+ aWriter.StartObjectElement();
+ {
+ aWriter.StringProperty("key", graph.mKey);
+ aWriter.StringProperty("type", GraphTypeToStringSpan(graph.mType));
+ if (graph.mColor) {
+ aWriter.StringProperty("color",
+ GraphColorToStringSpan(*graph.mColor));
+ }
+ }
+ aWriter.EndObject();
+ }
+ }
+ aWriter.EndArray();
+ }
+ }
+ aWriter.EndObject();
+}
+
+/* static */
+Span<const char> MarkerSchema::LocationToStringSpan(
+ MarkerSchema::Location aLocation) {
+ switch (aLocation) {
+ case Location::MarkerChart:
+ return mozilla::MakeStringSpan("marker-chart");
+ case Location::MarkerTable:
+ return mozilla::MakeStringSpan("marker-table");
+ case Location::TimelineOverview:
+ return mozilla::MakeStringSpan("timeline-overview");
+ case Location::TimelineMemory:
+ return mozilla::MakeStringSpan("timeline-memory");
+ case Location::TimelineIPC:
+ return mozilla::MakeStringSpan("timeline-ipc");
+ case Location::TimelineFileIO:
+ return mozilla::MakeStringSpan("timeline-fileio");
+ case Location::StackChart:
+ return mozilla::MakeStringSpan("stack-chart");
+ default:
+ MOZ_CRASH("Unexpected Location enum");
+ return {};
+ }
+}
+
+/* static */
+Span<const char> MarkerSchema::FormatToStringSpan(
+ MarkerSchema::Format aFormat) {
+ switch (aFormat) {
+ case Format::Url:
+ return mozilla::MakeStringSpan("url");
+ case Format::FilePath:
+ return mozilla::MakeStringSpan("file-path");
+ case Format::String:
+ return mozilla::MakeStringSpan("string");
+ case Format::UniqueString:
+ return mozilla::MakeStringSpan("unique-string");
+ case Format::Duration:
+ return mozilla::MakeStringSpan("duration");
+ case Format::Time:
+ return mozilla::MakeStringSpan("time");
+ case Format::Seconds:
+ return mozilla::MakeStringSpan("seconds");
+ case Format::Milliseconds:
+ return mozilla::MakeStringSpan("milliseconds");
+ case Format::Microseconds:
+ return mozilla::MakeStringSpan("microseconds");
+ case Format::Nanoseconds:
+ return mozilla::MakeStringSpan("nanoseconds");
+ case Format::Bytes:
+ return mozilla::MakeStringSpan("bytes");
+ case Format::Percentage:
+ return mozilla::MakeStringSpan("percentage");
+ case Format::Integer:
+ return mozilla::MakeStringSpan("integer");
+ case Format::Decimal:
+ return mozilla::MakeStringSpan("decimal");
+ default:
+ MOZ_CRASH("Unexpected Format enum");
+ return {};
+ }
+}
+
+/* static */
+Span<const char> MarkerSchema::GraphTypeToStringSpan(
+ MarkerSchema::GraphType aType) {
+ switch (aType) {
+ case GraphType::Line:
+ return mozilla::MakeStringSpan("line");
+ case GraphType::Bar:
+ return mozilla::MakeStringSpan("bar");
+ case GraphType::FilledLine:
+ return mozilla::MakeStringSpan("line-filled");
+ default:
+ MOZ_CRASH("Unexpected GraphType enum");
+ return {};
+ }
+}
+
+/* static */
+Span<const char> MarkerSchema::GraphColorToStringSpan(
+ MarkerSchema::GraphColor aColor) {
+ switch (aColor) {
+ case GraphColor::Blue:
+ return mozilla::MakeStringSpan("blue");
+ case GraphColor::Green:
+ return mozilla::MakeStringSpan("green");
+ case GraphColor::Grey:
+ return mozilla::MakeStringSpan("grey");
+ case GraphColor::Ink:
+ return mozilla::MakeStringSpan("ink");
+ case GraphColor::Magenta:
+ return mozilla::MakeStringSpan("magenta");
+ case GraphColor::Orange:
+ return mozilla::MakeStringSpan("orange");
+ case GraphColor::Purple:
+ return mozilla::MakeStringSpan("purple");
+ case GraphColor::Red:
+ return mozilla::MakeStringSpan("red");
+ case GraphColor::Teal:
+ return mozilla::MakeStringSpan("teal");
+ case GraphColor::Yellow:
+ return mozilla::MakeStringSpan("yellow");
+ default:
+ MOZ_CRASH("Unexpected GraphColor enum");
+ return {};
+ }
+}
+
+} // namespace mozilla
+
+namespace mozilla::baseprofiler {
+template MFBT_API ProfileBufferBlockIndex AddMarker(const ProfilerString8View&,
+ const MarkerCategory&,
+ MarkerOptions&&,
+ markers::TextMarker,
+ const std::string&);
+
+template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+ const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+ MarkerOptions&&, markers::TextMarker, const std::string&);
+} // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfilerUtils.cpp b/mozglue/baseprofiler/core/ProfilerUtils.cpp
new file mode 100644
index 0000000000..5f53910774
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerUtils.cpp
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file implements functions from BaseProfilerUtils.h on all platforms.
+// Functions with platform-specific implementations are separated in #if blocks
+// below, with each block being self-contained with all the #includes and
+// definitions it needs, to keep platform code easier to maintain in isolation.
+
+#include "mozilla/BaseProfilerUtils.h"
+
+// --------------------------------------------- WASI process & thread ids
+#if defined(__wasi__)
+
+namespace mozilla::baseprofiler {
+
+// WASI is single-process and single-thread for now.
+
+BaseProfilerProcessId profiler_current_process_id() {
+ return BaseProfilerProcessId::FromNativeId(1u);
+}
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ return BaseProfilerThreadId::FromNativeId(1u);
+}
+
+} // namespace mozilla::baseprofiler
+
+// --------------------------------------------- Windows process & thread ids
+#elif defined(XP_WIN)
+
+# include <process.h>
+# include <processthreadsapi.h>
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerProcessId profiler_current_process_id() {
+ return BaseProfilerProcessId::FromNativeId(_getpid());
+}
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ static_assert(std::is_same_v<BaseProfilerThreadId::NativeType,
+ decltype(GetCurrentThreadId())>,
+ "BaseProfilerThreadId::NativeType must be exactly the type "
+ "returned by GetCurrentThreadId()");
+ return BaseProfilerThreadId::FromNativeId(GetCurrentThreadId());
+}
+
+} // namespace mozilla::baseprofiler
+
+// --------------------------------------------- Non-Windows process id
+#else
+// All non-Windows platforms are assumed to be POSIX, which has getpid().
+
+# include <unistd.h>
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerProcessId profiler_current_process_id() {
+ return BaseProfilerProcessId::FromNativeId(getpid());
+}
+
+} // namespace mozilla::baseprofiler
+
+// --------------------------------------------- Non-Windows thread id
+// ------------------------------------------------------- macOS
+# if defined(XP_MACOSX)
+
+# include <pthread.h>
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ uint64_t tid;
+ if (pthread_threadid_np(nullptr, &tid) != 0) {
+ return BaseProfilerThreadId{};
+ }
+ return BaseProfilerThreadId::FromNativeId(tid);
+}
+
+} // namespace mozilla::baseprofiler
+
+// ------------------------------------------------------- Android
+// Test Android before Linux, because Linux includes Android.
+# elif defined(__ANDROID__) || defined(ANDROID)
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ return BaseProfilerThreadId::FromNativeId(gettid());
+}
+
+} // namespace mozilla::baseprofiler
+
+// ------------------------------------------------------- Linux
+# elif defined(XP_LINUX)
+
+# include <sys/syscall.h>
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ // glibc doesn't provide a wrapper for gettid() until 2.30
+ return BaseProfilerThreadId::FromNativeId(syscall(SYS_gettid));
+}
+
+} // namespace mozilla::baseprofiler
+
+// ------------------------------------------------------- FreeBSD
+# elif defined(XP_FREEBSD)
+
+# include <sys/thr.h>
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ long id;
+ if (thr_self(&id) != 0) {
+ return BaseProfilerThreadId{};
+ }
+ return BaseProfilerThreadId::FromNativeId(id);
+}
+
+} // namespace mozilla::baseprofiler
+
+// ------------------------------------------------------- Others
+# else
+
+namespace mozilla::baseprofiler {
+
+BaseProfilerThreadId profiler_current_thread_id() {
+ return BaseProfilerThreadId::FromNativeId(std::this_thread::get_id());
+}
+
+} // namespace mozilla::baseprofiler
+
+# endif
+#endif // End of non-XP_WIN.
+
+// --------------------------------------------- Platform-agnostic definitions
+
+namespace mozilla::baseprofiler {
+
+static BaseProfilerThreadId scBaseProfilerMainThreadId{};
+
+void profiler_init_main_thread_id() {
+ if (!scBaseProfilerMainThreadId.IsSpecified()) {
+ scBaseProfilerMainThreadId = profiler_current_thread_id();
+ }
+}
+
+BaseProfilerThreadId profiler_main_thread_id() {
+ return scBaseProfilerMainThreadId;
+}
+
+bool profiler_is_main_thread() {
+ return profiler_current_thread_id() == scBaseProfilerMainThreadId;
+}
+
+} // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfilingCategory.cpp b/mozglue/baseprofiler/core/ProfilingCategory.cpp
new file mode 100644
index 0000000000..8ff2b15555
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingCategory.cpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// ProfilingSubcategory_X:
+// One enum for each category X, listing that category's subcategories. This
+// allows the sProfilingCategoryInfo macro construction below to look up a
+// per-category index for a subcategory.
+#define SUBCATEGORY_ENUMS_BEGIN_CATEGORY(name, labelAsString, color) \
+ enum class ProfilingSubcategory_##name : uint32_t {
+#define SUBCATEGORY_ENUMS_SUBCATEGORY(category, name, labelAsString) \
+ name,
+#define SUBCATEGORY_ENUMS_END_CATEGORY \
+ };
+MOZ_PROFILING_CATEGORY_LIST(SUBCATEGORY_ENUMS_BEGIN_CATEGORY,
+ SUBCATEGORY_ENUMS_SUBCATEGORY,
+ SUBCATEGORY_ENUMS_END_CATEGORY)
+#undef SUBCATEGORY_ENUMS_BEGIN_CATEGORY
+#undef SUBCATEGORY_ENUMS_SUBCATEGORY
+#undef SUBCATEGORY_ENUMS_END_CATEGORY
+
+// sProfilingCategoryPairInfo:
+// A list of ProfilingCategoryPairInfos with the same order as
+// ProfilingCategoryPair, which can be used to map a ProfilingCategoryPair to
+// its information.
+#define CATEGORY_INFO_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_INFO_SUBCATEGORY(category, name, labelAsString) \
+ {ProfilingCategory::category, \
+ uint32_t(ProfilingSubcategory_##category::name), labelAsString},
+#define CATEGORY_INFO_END_CATEGORY
+const ProfilingCategoryPairInfo sProfilingCategoryPairInfo[] = {
+ MOZ_PROFILING_CATEGORY_LIST(CATEGORY_INFO_BEGIN_CATEGORY,
+ CATEGORY_INFO_SUBCATEGORY,
+ CATEGORY_INFO_END_CATEGORY)
+};
+#undef CATEGORY_INFO_BEGIN_CATEGORY
+#undef CATEGORY_INFO_SUBCATEGORY
+#undef CATEGORY_INFO_END_CATEGORY
+
+// clang-format on
+
+const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+ ProfilingCategoryPair aCategoryPair) {
+ static_assert(
+ MOZ_ARRAY_LENGTH(sProfilingCategoryPairInfo) ==
+ uint32_t(ProfilingCategoryPair::COUNT),
+ "sProfilingCategoryPairInfo and ProfilingCategory need to have the "
+ "same order and the same length");
+
+ uint32_t categoryPairIndex = uint32_t(aCategoryPair);
+ MOZ_RELEASE_ASSERT(categoryPairIndex <=
+ uint32_t(ProfilingCategoryPair::LAST));
+ return sProfilingCategoryPairInfo[categoryPairIndex];
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilingStack.cpp b/mozglue/baseprofiler/core/ProfilingStack.cpp
new file mode 100644
index 0000000000..f5cd2ddd04
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingStack.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingStack.h"
+
+#include <algorithm>
+
+#include "mozilla/IntegerRange.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilingStack::~ProfilingStack() {
+ // The label macros keep a reference to the ProfilingStack to avoid a TLS
+ // access. If these are somehow not all cleared we will get a
+ // use-after-free so better to crash now.
+ MOZ_RELEASE_ASSERT(stackPointer == 0);
+
+ delete[] frames;
+}
+
+void ProfilingStack::ensureCapacitySlow() {
+ MOZ_ASSERT(stackPointer >= capacity);
+ const uint32_t kInitialCapacity = 128;
+
+ uint32_t sp = stackPointer;
+ auto newCapacity =
+ std::max(sp + 1, capacity ? capacity * 2 : kInitialCapacity);
+
+ auto* newFrames = new ProfilingStackFrame[newCapacity];
+
+ // It's important that `frames` / `capacity` / `stackPointer` remain
+ // consistent here at all times.
+ for (auto i : IntegerRange(capacity)) {
+ newFrames[i] = frames[i];
+ }
+
+ ProfilingStackFrame* oldFrames = frames;
+ frames = newFrames;
+ capacity = newCapacity;
+ delete[] oldFrames;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.cpp b/mozglue/baseprofiler/core/RegisteredThread.cpp
new file mode 100644
index 0000000000..85a7fc2c6d
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.cpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RegisteredThread.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+RegisteredThread::RegisteredThread(ThreadInfo* aInfo, void* aStackTop)
+ : mRacyRegisteredThread(aInfo->ThreadId()),
+ mPlatformData(AllocPlatformData(aInfo->ThreadId())),
+ mStackTop(aStackTop),
+ mThreadInfo(aInfo) {
+ // We don't have to guess on mac
+#if defined(GP_OS_darwin)
+ pthread_t self = pthread_self();
+ mStackTop = pthread_get_stackaddr_np(self);
+#endif
+}
+
+RegisteredThread::~RegisteredThread() {}
+
+size_t RegisteredThread::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ size_t n = aMallocSizeOf(this);
+
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - mPlatformData
+ //
+ // The following members are not measured:
+ // - mThreadInfo: because it is non-owning
+
+ return n;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.h b/mozglue/baseprofiler/core/RegisteredThread.h
new file mode 100644
index 0000000000..baaf7dced6
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.h
@@ -0,0 +1,164 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RegisteredThread_h
+#define RegisteredThread_h
+
+#include "platform.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains the state for a single thread that is accessible without
+// protection from gPSMutex in platform.cpp. Because there is no external
+// protection against data races, it must provide internal protection. Hence
+// the "Racy" prefix.
+//
+class RacyRegisteredThread final {
+ public:
+ explicit RacyRegisteredThread(BaseProfilerThreadId aThreadId)
+ : mThreadId(aThreadId), mSleep(AWAKE), mIsBeingProfiled(false) {}
+
+ ~RacyRegisteredThread() {}
+
+ void SetIsBeingProfiled(bool aIsBeingProfiled) {
+ mIsBeingProfiled = aIsBeingProfiled;
+ }
+
+ bool IsBeingProfiled() const { return mIsBeingProfiled; }
+
+ // This is called on every profiler restart. Put things that should happen at
+ // that time here.
+ void ReinitializeOnResume() {
+ // This is needed to cause an initial sample to be taken from sleeping
+ // threads that had been observed prior to the profiler stopping and
+ // restarting. Otherwise sleeping threads would not have any samples to
+ // copy forward while sleeping.
+ (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED);
+ }
+
+ // This returns true for the second and subsequent calls in each sleep cycle.
+ bool CanDuplicateLastSampleDueToSleep() {
+ if (mSleep == AWAKE) {
+ return false;
+ }
+
+ if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ // Call this whenever the current thread sleeps. Calling it twice in a row
+ // without an intervening setAwake() call is an error.
+ void SetSleeping() {
+ MOZ_ASSERT(mSleep == AWAKE);
+ mSleep = SLEEPING_NOT_OBSERVED;
+ }
+
+ // Call this whenever the current thread wakes. Calling it twice in a row
+ // without an intervening setSleeping() call is an error.
+ void SetAwake() {
+ MOZ_ASSERT(mSleep != AWAKE);
+ mSleep = AWAKE;
+ }
+
+ bool IsSleeping() { return mSleep != AWAKE; }
+
+ BaseProfilerThreadId ThreadId() const { return mThreadId; }
+
+ class ProfilingStack& ProfilingStack() { return mProfilingStack; }
+ const class ProfilingStack& ProfilingStack() const { return mProfilingStack; }
+
+ private:
+ class ProfilingStack mProfilingStack;
+
+ // mThreadId contains the thread ID of the current thread. It is safe to read
+ // this from multiple threads concurrently, as it will never be mutated.
+ const BaseProfilerThreadId mThreadId;
+
+ // mSleep tracks whether the thread is sleeping, and if so, whether it has
+ // been previously observed. This is used for an optimization: in some cases,
+ // when a thread is asleep, we duplicate the previous sample, which is
+ // cheaper than taking a new sample.
+ //
+ // mSleep is atomic because it is accessed from multiple threads.
+ //
+ // - It is written only by this thread, via setSleeping() and setAwake().
+ //
+ // - It is read by SamplerThread::Run().
+ //
+ // There are two cases where racing between threads can cause an issue.
+ //
+ // - If CanDuplicateLastSampleDueToSleep() returns false but that result is
+ // invalidated before being acted upon, we will take a full sample
+ // unnecessarily. This is additional work but won't cause any correctness
+ // issues. (In actual fact, this case is impossible. In order to go from
+ // CanDuplicateLastSampleDueToSleep() returning false to it returning true
+ // requires an intermediate call to it in order for mSleep to go from
+ // SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
+ //
+ // - If CanDuplicateLastSampleDueToSleep() returns true but that result is
+ // invalidated before being acted upon -- i.e. the thread wakes up before
+ // DuplicateLastSample() is called -- we will duplicate the previous
+ // sample. This is inaccurate, but only slightly... we will effectively
+ // treat the thread as having slept a tiny bit longer than it really did.
+ //
+ // This latter inaccuracy could be avoided by moving the
+ // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
+ // e.g. the section where Tick() is called. But that would reduce the
+ // effectiveness of the optimization because more code would have to be run
+ // before we can tell that duplication is allowed.
+ //
+ static const int AWAKE = 0;
+ static const int SLEEPING_NOT_OBSERVED = 1;
+ static const int SLEEPING_OBSERVED = 2;
+ Atomic<int> mSleep;
+
+ // Is this thread being profiled? (e.g., should markers be recorded?)
+ Atomic<bool, MemoryOrdering::Relaxed> mIsBeingProfiled;
+};
+
+// This class contains information that's relevant to a single thread only
+// while that thread is running and registered with the profiler, but
+// regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+class RegisteredThread final {
+ public:
+ RegisteredThread(ThreadInfo* aInfo, void* aStackTop);
+ ~RegisteredThread();
+
+ class RacyRegisteredThread& RacyRegisteredThread() {
+ return mRacyRegisteredThread;
+ }
+ const class RacyRegisteredThread& RacyRegisteredThread() const {
+ return mRacyRegisteredThread;
+ }
+
+ PlatformData* GetPlatformData() const { return mPlatformData.get(); }
+ const void* StackTop() const { return mStackTop; }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+ const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+ private:
+ class RacyRegisteredThread mRacyRegisteredThread;
+
+ const UniquePlatformData mPlatformData;
+ const void* mStackTop;
+
+ const RefPtr<ThreadInfo> mThreadInfo;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // RegisteredThread_h
diff --git a/mozglue/baseprofiler/core/ThreadInfo.h b/mozglue/baseprofiler/core/ThreadInfo.h
new file mode 100644
index 0000000000..80211396ac
--- /dev/null
+++ b/mozglue/baseprofiler/core/ThreadInfo.h
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ThreadInfo_h
+#define ThreadInfo_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/BaseProfilerUtils.h"
+#include "mozilla/TimeStamp.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains information about a thread which needs to be stored
+// across restarts of the profiler and which can be useful even after the
+// thread has stopped running.
+// It uses threadsafe refcounting and only contains immutable data.
+class ThreadInfo final {
+ public:
+ ThreadInfo(const char* aName, BaseProfilerThreadId aThreadId,
+ bool aIsMainThread,
+ const TimeStamp& aRegisterTime = TimeStamp::Now())
+ : mName(aName),
+ mRegisterTime(aRegisterTime),
+ mThreadId(aThreadId),
+ mIsMainThread(aIsMainThread),
+ mRefCnt(0) {
+ MOZ_ASSERT(aThreadId.IsSpecified(),
+ "Given aThreadId should not be unspecified");
+ }
+
+ // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+ // the same code between mozglue and libxul, see bug 1536656.
+ MFBT_API void AddRef() const { ++mRefCnt; }
+ MFBT_API void Release() const {
+ MOZ_ASSERT(int32_t(mRefCnt) > 0);
+ if (--mRefCnt == 0) {
+ delete this;
+ }
+ }
+
+ const char* Name() const { return mName.c_str(); }
+ TimeStamp RegisterTime() const { return mRegisterTime; }
+ BaseProfilerThreadId ThreadId() const { return mThreadId; }
+ bool IsMainThread() const { return mIsMainThread; }
+
+ private:
+ const std::string mName;
+ const TimeStamp mRegisterTime;
+ const BaseProfilerThreadId mThreadId;
+ const bool mIsMainThread;
+
+ mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // ThreadInfo_h
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.cpp b/mozglue/baseprofiler/core/VTuneProfiler.cpp
new file mode 100644
index 0000000000..2911c39f08
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+# undef UNICODE
+# undef _UNICODE
+#endif
+
+#include "VTuneProfiler.h"
+
+#include <memory>
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+VTuneProfiler* VTuneProfiler::mInstance = nullptr;
+
+void VTuneProfiler::Initialize() {
+ // This is just a 'dirty trick' to find out if the ittnotify DLL was found.
+ // If it wasn't this function always returns 0, otherwise it returns
+ // incrementing numbers, if the library was found this wastes 2 events but
+ // that should be okay.
+ // TODO re-implement here if vtune is needed
+ // __itt_event testEvent =
+ // __itt_event_create("Test event", strlen("Test event"));
+ // testEvent = __itt_event_create("Test event 2", strlen("Test event 2"));
+
+ // if (testEvent) {
+ // mInstance = new VTuneProfiler();
+ // }
+}
+
+void VTuneProfiler::Shutdown() {}
+
+void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) {
+ // TODO re-implement here if vtune is needed
+ // std::string str(aName);
+
+ // auto iter = mStrings.find(str);
+
+ // __itt_event event;
+ // if (iter != mStrings.end()) {
+ // event = iter->second;
+ // } else {
+ // event = __itt_event_create(aName, str.length());
+ // mStrings.insert({str, event});
+ // }
+
+ // if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) {
+ // // VTune will consider starts not matched with an end to be single point
+ // in
+ // // time events.
+ // __itt_event_start(event);
+ // } else {
+ // __itt_event_end(event);
+ // }
+}
+
+void VTuneProfiler::RegisterThreadInternal(const char* aName) {
+ // TODO re-implement here if vtune is needed
+ // std::string str(aName);
+
+ // if (!str.compare("Main Thread (Base Profiler)")) {
+ // // Process main thread.
+ // switch (XRE_GetProcessType()) {
+ // case GeckoProcessType::GeckoProcessType_Default:
+ // __itt_thread_set_name("Main Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_Content:
+ // __itt_thread_set_name("Content Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_GMPlugin:
+ // __itt_thread_set_name("Plugin Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_GPU:
+ // __itt_thread_set_name("GPU Process");
+ // break;
+ // default:
+ // __itt_thread_set_name("Unknown Process");
+ // }
+ // return;
+ // }
+ // __itt_thread_set_name(aName);
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.h b/mozglue/baseprofiler/core/VTuneProfiler.h
new file mode 100644
index 0000000000..cf94ab7242
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.h
@@ -0,0 +1,84 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VTuneProfiler_h
+#define VTuneProfiler_h
+
+// The intent here is to add 0 overhead for regular users. In order to build
+// the VTune profiler code at all --enable-vtune-instrumentation needs to be
+// set as a build option. Even then, when none of the environment variables
+// is specified that allow us to find the ittnotify DLL, these functions
+// should be minimal overhead. When starting Firefox under VTune, these
+// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64
+// should be set to point at the ittnotify DLL.
+#ifndef MOZ_VTUNE_INSTRUMENTATION
+
+# define VTUNE_INIT()
+# define VTUNE_SHUTDOWN()
+
+# define VTUNE_TRACING(name, kind)
+# define VTUNE_REGISTER_THREAD(name)
+
+#else
+
+# include "BaseProfiler.h"
+
+// This is the regular Intel header, these functions are actually defined for
+// us inside js/src/vtune by an intel C file which actually dynamically resolves
+// them to the correct DLL. Through libxul these will 'magically' resolve.
+# include "vtune/ittnotify.h"
+
+# include <stddef.h>
+# include <unordered_map>
+# include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class VTuneProfiler {
+ public:
+ static void Initialize();
+ static void Shutdown();
+
+ enum TracingKind {
+ TRACING_EVENT,
+ TRACING_INTERVAL_START,
+ TRACING_INTERVAL_END,
+ };
+
+ static void Trace(const char* aName, TracingKind aKind) {
+ if (mInstance) {
+ mInstance->TraceInternal(aName, aKind);
+ }
+ }
+ static void RegisterThread(const char* aName) {
+ if (mInstance) {
+ mInstance->RegisterThreadInternal(aName);
+ }
+ }
+
+ private:
+ void TraceInternal(const char* aName, TracingKind aKind);
+ void RegisterThreadInternal(const char* aName);
+
+ // This is null when the ittnotify DLL could not be found.
+ static VTuneProfiler* mInstance;
+
+ std::unordered_map<std::string, __itt_event> mStrings;
+};
+
+# define VTUNE_INIT() VTuneProfiler::Initialize()
+# define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown()
+
+# define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind)
+# define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name)
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
+
+#endif /* VTuneProfiler_h */
diff --git a/mozglue/baseprofiler/core/platform-linux-android.cpp b/mozglue/baseprofiler/core/platform-linux-android.cpp
new file mode 100644
index 0000000000..f21b0a0b7d
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-linux-android.cpp
@@ -0,0 +1,513 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// This file is used for both Linux and Android.
+
+#include <stdio.h>
+#include <math.h>
+
+#include <pthread.h>
+#if defined(GP_OS_freebsd)
+# include <sys/thr.h>
+#endif
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <ucontext.h>
+// Ubuntu Dapper requires memory pages to be marked as
+// executable. Otherwise, OS raises an exception when executing code
+// in that page.
+#include <sys/types.h> // mmap & munmap
+#include <sys/mman.h> // mmap & munmap
+#include <sys/stat.h> // open
+#include <fcntl.h> // open
+#include <unistd.h> // sysconf
+#include <semaphore.h>
+#ifdef __GLIBC__
+# include <execinfo.h> // backtrace, backtrace_symbols
+#endif // def __GLIBC__
+#include <strings.h> // index
+#include <errno.h>
+#include <stdarg.h>
+
+#include "prenv.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/DebugOnly.h"
+
+#include <string.h>
+#include <list>
+
+using namespace mozilla;
+
+namespace mozilla {
+namespace baseprofiler {
+
+static int64_t MicrosecondsSince1970() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) { return aGuess; }
+
+static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
+ aRegs.mContext = aContext;
+ mcontext_t& mcontext = aContext->uc_mcontext;
+
+ // Extracting the sample from the context is extremely machine dependent.
+#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
+ aRegs.mEcx = reinterpret_cast<Address>(mcontext.gregs[REG_ECX]);
+ aRegs.mEdx = reinterpret_cast<Address>(mcontext.gregs[REG_EDX]);
+#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
+ aRegs.mR10 = reinterpret_cast<Address>(mcontext.gregs[REG_R10]);
+ aRegs.mR12 = reinterpret_cast<Address>(mcontext.gregs[REG_R12]);
+#elif defined(GP_PLAT_amd64_freebsd)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
+ aRegs.mR10 = reinterpret_cast<Address>(mcontext.mc_r10);
+ aRegs.mR12 = reinterpret_cast<Address>(mcontext.mc_r12);
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
+ aRegs.mR7 = reinterpret_cast<Address>(mcontext.arm_r7);
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
+ aRegs.mR11 = reinterpret_cast<Address>(mcontext.regs[11]);
+#elif defined(GP_PLAT_arm64_freebsd)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
+ aRegs.mR11 = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[11]);
+#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
+
+#else
+# error "bad platform"
+#endif
+}
+
+#if defined(GP_OS_android)
+# define SYS_tgkill __NR_tgkill
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+int tgkill(pid_t tgid, pid_t tid, int signalno) {
+ return syscall(SYS_tgkill, tgid, tid, signalno);
+}
+#endif
+
+#if defined(GP_OS_freebsd)
+# define tgkill thr_kill2
+#endif
+
+class PlatformData {
+ public:
+ explicit PlatformData(BaseProfilerThreadId aThreadId) {}
+
+ ~PlatformData() {}
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler. But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+//
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point sSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2) Copy register state
+// into sSigHandlerCoordinator
+// <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended. wait(mMessage3)
+// Examine its stack/register
+// state at leisure
+//
+// Release samplee:
+// post(mMessage3) ------- MSG 3 ----->
+// wait(mMessage4) Samplee now resumes. Tell
+// the sampler that we are done.
+// <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal (leave signal handler)
+// handler has finished using
+// sSigHandlerCoordinator. We can
+// safely reuse it for some other thread.
+//
+
+// A type used to coordinate between the sampler (signal sending) thread and
+// the thread currently being sampled (the samplee, which receives the
+// signals).
+//
+// The first message is sent using a SIGPROF signal delivery. The subsequent
+// three are sent using sem_wait/sem_post pairs. They are named accordingly
+// in the following struct.
+struct SigHandlerCoordinator {
+ SigHandlerCoordinator() {
+ PodZero(&mUContext);
+ int r = sem_init(&mMessage2, /* pshared */ 0, 0);
+ r |= sem_init(&mMessage3, /* pshared */ 0, 0);
+ r |= sem_init(&mMessage4, /* pshared */ 0, 0);
+ MOZ_ASSERT(r == 0);
+ (void)r;
+ }
+
+ ~SigHandlerCoordinator() {
+ int r = sem_destroy(&mMessage2);
+ r |= sem_destroy(&mMessage3);
+ r |= sem_destroy(&mMessage4);
+ MOZ_ASSERT(r == 0);
+ (void)r;
+ }
+
+ sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
+ sem_t mMessage3; // To samplee: "resume"
+ sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
+ ucontext_t mUContext; // Context at signal
+};
+
+struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+
+static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
+ // Avoid TSan warning about clobbering errno.
+ int savedErrno = errno;
+
+ MOZ_ASSERT(aSignal == SIGPROF);
+ MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+
+ // By sending us this signal, the sampler thread has sent us message 1 in
+ // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
+ // for use, please copy your register context into it."
+ Sampler::sSigHandlerCoordinator->mUContext =
+ *static_cast<ucontext_t*>(aContext);
+
+ // Send message 2: tell the sampler thread that the context has been copied
+ // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by
+ // being interrupted by a signal, so there's no loop around this call.
+ int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+ MOZ_ASSERT(r == 0);
+
+ // At this point, the sampler thread assumes we are suspended, so we must
+ // not touch any global state here.
+
+ // Wait for message 3: the sampler thread tells us to resume.
+ while (true) {
+ r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+ if (r == -1 && errno == EINTR) {
+ // Interrupted by a signal. Try again.
+ continue;
+ }
+ // We don't expect any other kind of failure
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ // Send message 4: tell the sampler thread that we are finished accessing
+ // |sSigHandlerCoordinator|. After this point it is not safe to touch
+ // |sSigHandlerCoordinator|.
+ r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+ MOZ_ASSERT(r == 0);
+
+ errno = savedErrno;
+}
+
+Sampler::Sampler(PSLockRef aLock) : mMyPid(profiler_current_process_id()) {
+#if defined(USE_EHABI_STACKWALK)
+ EHABIStackWalkInit();
+#endif
+
+ // NOTE: We don't initialize LUL here, instead initializing it in
+ // SamplerThread's constructor. This is because with the
+ // profiler_suspend_and_sample_thread entry point, we want to be able to
+ // sample without waiting for LUL to be initialized.
+
+ // Request profiling signals.
+ struct sigaction sa;
+ sa.sa_sigaction = SigprofHandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
+ MOZ_CRASH("Error installing SIGPROF handler in the profiler");
+ }
+}
+
+void Sampler::Disable(PSLockRef aLock) {
+ // Restore old signal handler. This is global state so it's important that
+ // we do it now, while gPSMutex is locked.
+ sigaction(SIGPROF, &mOldSigprofHandler, 0);
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ // Only one sampler thread can be sampling at once. So we expect to have
+ // complete control over |sSigHandlerCoordinator|.
+ MOZ_ASSERT(!sSigHandlerCoordinator);
+
+ if (!mSamplerTid.IsSpecified()) {
+ mSamplerTid = profiler_current_thread_id();
+ }
+ BaseProfilerThreadId sampleeTid = aRegisteredThread.Info()->ThreadId();
+ MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ SigHandlerCoordinator coord; // on sampler thread's stack
+ sSigHandlerCoordinator = &coord;
+
+ // Send message 1 to the samplee (the thread to be sampled), by
+ // signalling at it.
+ // This could fail if the thread doesn't exist anymore.
+ int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF);
+ if (r == 0) {
+ // Wait for message 2 from the samplee, indicating that the context
+ // is available and that the thread is suspended.
+ while (true) {
+ r = sem_wait(&sSigHandlerCoordinator->mMessage2);
+ if (r == -1 && errno == EINTR) {
+ // Interrupted by a signal. Try again.
+ continue;
+ }
+ // We don't expect any other kind of failure.
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. In the critical section,
+ // we must not do any dynamic memory allocation, nor try to acquire any lock
+ // or any other unshareable resource. This is because the thread to be
+ // sampled has been suspended at some entirely arbitrary point, and we have
+ // no idea which unsharable resources (locks, essentially) it holds. So any
+ // attempt to acquire any lock, including the implied locks used by the
+ // malloc implementation, risks deadlock. This includes TimeStamp::Now(),
+ // which gets a lock on Windows.
+
+ // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
+ // valid. We can poke around in it and unwind its stack as we like.
+
+ // Extract the current register values.
+ Registers regs;
+ PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
+ aProcessRegs(regs, aNow);
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ // Send message 3 to the samplee, which tells it to resume.
+ r = sem_post(&sSigHandlerCoordinator->mMessage3);
+ MOZ_ASSERT(r == 0);
+
+ // Wait for message 4 from the samplee, which tells us that it has
+ // finished with |sSigHandlerCoordinator|.
+ while (true) {
+ r = sem_wait(&sSigHandlerCoordinator->mMessage4);
+ if (r == -1 && errno == EINTR) {
+ continue;
+ }
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ // The profiler's critical section ends here. After this point, none of the
+ // critical section limitations documented above apply.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ }
+
+ // This isn't strictly necessary, but doing so does help pick up anomalies
+ // in which the signal handler is running when it shouldn't be.
+ sSigHandlerCoordinator = nullptr;
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds, uint32_t aFeatures)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+#if defined(USE_LUL_STACKWALK)
+ lul::LUL* lul = CorePS::Lul(aLock);
+ if (!lul && ProfilerFeature::HasStackWalkEnabled(aFeatures)) {
+ CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
+ // Read all the unwind info currently available.
+ lul = CorePS::Lul(aLock);
+ read_procmaps(lul);
+
+ // Switch into unwind mode. After this point, we can't add or remove any
+ // unwind info to/from this LUL instance. The only thing we can do with
+ // it is Unwind() calls.
+ lul->EnableUnwinding();
+
+ // Has a test been requested?
+ if (getenv("MOZ_PROFILER_LUL_TEST")) {
+ int nTests = 0, nTestsPassed = 0;
+ RunLulUnitTests(&nTests, &nTestsPassed, lul);
+ }
+ }
+#endif
+
+ // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+ // the signal ourselves instead of relying on itimer provides much better
+ // accuracy.
+ if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+ MOZ_CRASH("pthread_create failed");
+ }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ if (aMicroseconds >= 1000000) {
+ // Use usleep for larger intervals, because the nanosleep
+ // code below only supports intervals < 1 second.
+ MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+ return;
+ }
+
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = aMicroseconds * 1000UL;
+
+ int rv = ::nanosleep(&ts, &ts);
+
+ while (rv != 0 && errno == EINTR) {
+ // Keep waiting in case of interrupt.
+ // nanosleep puts the remaining time back into ts.
+ rv = ::nanosleep(&ts, &ts);
+ }
+
+ MOZ_ASSERT(!rv, "nanosleep call failed");
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+ // Restore old signal handler. This is global state so it's important that
+ // we do it now, while gPSMutex is locked. It's safe to do this now even
+ // though this SamplerThread is still alive, because the next time the main
+ // loop of Run() iterates it won't get past the mActivityGeneration check,
+ // and so won't send any signals.
+ mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+
+// We use pthread_atfork() to temporarily disable signal delivery during any
+// fork() call. Without that, fork() can be repeatedly interrupted by signal
+// delivery, requiring it to be repeatedly restarted, which can lead to *long*
+// delays. See bug 837390.
+//
+// We provide no paf_child() function to run in the child after forking. This
+// is fine because we always immediately exec() after fork(), and exec()
+// clobbers all process state. Also, we don't want the sampler to resume in the
+// child process between fork() and exec(), it would be wasteful.
+//
+// Unfortunately all this is only doable on non-Android because Bionic doesn't
+// have pthread_atfork.
+
+// In the parent, before the fork, increase gSkipSampling to ensure that
+// profiler sampling loops will be skipped. There could be one in progress now,
+// causing a small delay, but further sampling will be skipped, allowing `fork`
+// to complete.
+static void paf_prepare() { ++gSkipSampling; }
+
+// In the parent, after the fork, decrease gSkipSampling to let the sampler
+// resume sampling (unless other places have made it non-zero as well).
+static void paf_parent() { --gSkipSampling; }
+
+static void PlatformInit(PSLockRef aLock) {
+ // Set up the fork handlers.
+ pthread_atfork(paf_prepare, paf_parent, nullptr);
+}
+
+#else
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#endif
+
+#if defined(HAVE_NATIVE_UNWIND)
+// TODO port getcontext from breakpad, if profiler_get_backtrace is needed.
+# define REGISTERS_SYNC_POPULATE(regs) \
+ MOZ_CRASH("profiler_get_backtrace() unsupported");
+#endif
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-macos.cpp b/mozglue/baseprofiler/core/platform-macos.cpp
new file mode 100644
index 0000000000..87ce3eedeb
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-macos.cpp
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <mach/mach_init.h>
+#include <mach-o/getsect.h>
+
+#include <AvailabilityMacros.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <libkern/OSAtomic.h>
+#include <mach/mach.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <mach/vm_statistics.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+// this port is based off of v8 svn revision 9837
+
+namespace mozilla {
+namespace baseprofiler {
+
+static int64_t MicrosecondsSince1970() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) {
+ pthread_t thread = pthread_self();
+ return pthread_get_stackaddr_np(thread);
+}
+
+class PlatformData {
+ public:
+ explicit PlatformData(BaseProfilerThreadId aThreadId)
+ : mProfiledThread(mach_thread_self()) {}
+
+ ~PlatformData() {
+ // Deallocate Mach port for thread.
+ mach_port_deallocate(mach_task_self(), mProfiledThread);
+ }
+
+ thread_act_t ProfiledThread() { return mProfiledThread; }
+
+ private:
+ // Note: for mProfiledThread Mach primitives are used instead of pthread's
+ // because the latter doesn't provide thread manipulation primitives required.
+ // For details, consult "Mac OS X Internals" book, Section 7.3.
+ thread_act_t mProfiledThread;
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ thread_act_t samplee_thread =
+ aRegisteredThread.GetPlatformData()->ProfiledThread();
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ // We're using thread_suspend on OS X because pthread_kill (which is what we
+ // at one time used on Linux) has less consistent performance and causes
+ // strange crashes, see bug 1166778 and bug 1166808. thread_suspend
+ // is also just a lot simpler to use.
+
+ if (KERN_SUCCESS != thread_suspend(samplee_thread)) {
+ return;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. We must be very careful
+ // what we do here, or risk deadlock. See the corresponding comment in
+ // platform-linux-android.cpp for details.
+
+#if defined(__x86_64__)
+ thread_state_flavor_t flavor = x86_THREAD_STATE64;
+ x86_thread_state64_t state;
+ mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+# if __DARWIN_UNIX03
+# define REGISTER_FIELD(name) __r##name
+# else
+# define REGISTER_FIELD(name) r##name
+# endif // __DARWIN_UNIX03
+#elif defined(__aarch64__)
+ thread_state_flavor_t flavor = ARM_THREAD_STATE64;
+ arm_thread_state64_t state;
+ mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT;
+# if __DARWIN_UNIX03
+# define REGISTER_FIELD(name) __##name
+# else
+# define REGISTER_FIELD(name) name
+# endif // __DARWIN_UNIX03
+#else
+# error "unknown architecture"
+#endif
+
+ if (thread_get_state(samplee_thread, flavor,
+ reinterpret_cast<natural_t*>(&state),
+ &count) == KERN_SUCCESS) {
+ Registers regs;
+#if defined(__x86_64__)
+ regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
+ regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+ regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
+ regs.mR10 = reinterpret_cast<Address>(state.REGISTER_FIELD(10));
+ regs.mR12 = reinterpret_cast<Address>(state.REGISTER_FIELD(12));
+#elif defined(__aarch64__)
+ regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc));
+ regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+ regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp));
+ regs.mLR = reinterpret_cast<Address>(state.REGISTER_FIELD(lr));
+ regs.mR11 = reinterpret_cast<Address>(state.REGISTER_FIELD(x[11]));
+#else
+# error "unknown architecture"
+#endif
+
+ aProcessRegs(regs, aNow);
+ }
+
+#undef REGISTER_FIELD
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ thread_resume(samplee_thread);
+
+ // The profiler's critical section ends here.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds, uint32_t aFeatures)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+ mThread{nullptr} {
+ pthread_attr_t* attr_ptr = nullptr;
+ if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+ MOZ_CRASH("pthread_create failed");
+ }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ usleep(aMicroseconds);
+ // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+ // obsolescent. Use nanosleep(2) instead." This implementation could be
+ // merged with the linux-android version. Also, this doesn't handle the
+ // case where the usleep call is interrupted by a signal.
+}
+
+void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); }
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+// clang-format off
+#if defined(HAVE_NATIVE_UNWIND)
+// Derive the stack pointer from the frame pointer. The 0x10 offset is
+// 8 bytes for the previous frame pointer and 8 bytes for the return
+// address both stored on the stack after at the beginning of the current
+// frame.
+# define REGISTERS_SYNC_POPULATE(regs) \
+ regs.mSP = reinterpret_cast<Address>(__builtin_frame_address(0)) + 0x10; \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wframe-address\"") \
+ regs.mFP = reinterpret_cast<Address>(__builtin_frame_address(1)); \
+ _Pragma("GCC diagnostic pop") \
+ regs.mPC = reinterpret_cast<Address>( \
+ __builtin_extract_return_addr(__builtin_return_address(0)));
+#endif
+// clang-format on
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-win32.cpp b/mozglue/baseprofiler/core/platform-win32.cpp
new file mode 100644
index 0000000000..d2ddf1a590
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-win32.cpp
@@ -0,0 +1,297 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <process.h>
+
+namespace mozilla {
+namespace baseprofiler {
+
+static int64_t MicrosecondsSince1970() {
+ int64_t prt;
+ FILETIME ft;
+ SYSTEMTIME st;
+
+ GetSystemTime(&st);
+ SystemTimeToFileTime(&st, &ft);
+ static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits");
+ memcpy(&prt, &ft, sizeof(prt));
+ const int64_t epochBias = 116444736000000000LL;
+ prt = (prt - epochBias) / 10;
+
+ return prt;
+}
+
+void* GetStackTop(void* aGuess) {
+ PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
+ return reinterpret_cast<void*>(pTib->StackBase);
+}
+
+static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
+#if defined(GP_ARCH_amd64)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
+ aRegs.mR10 = reinterpret_cast<Address>(aContext->R10);
+ aRegs.mR12 = reinterpret_cast<Address>(aContext->R12);
+#elif defined(GP_ARCH_x86)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
+ aRegs.mEcx = reinterpret_cast<Address>(aContext->Ecx);
+ aRegs.mEdx = reinterpret_cast<Address>(aContext->Edx);
+#elif defined(GP_ARCH_arm64)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
+ aRegs.mLR = reinterpret_cast<Address>(aContext->Lr);
+ aRegs.mR11 = reinterpret_cast<Address>(aContext->X11);
+#else
+# error "bad arch"
+#endif
+}
+
+// Gets a real (i.e. not pseudo) handle for the current thread, with the
+// permissions needed for profiling.
+// @return a real HANDLE for the current thread.
+static HANDLE GetRealCurrentThreadHandleForProfiling() {
+ HANDLE realCurrentThreadHandle;
+ if (!::DuplicateHandle(
+ ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
+ &realCurrentThreadHandle,
+ THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
+ FALSE, 0)) {
+ return nullptr;
+ }
+
+ return realCurrentThreadHandle;
+}
+
+class PlatformData {
+ public:
+ // Get a handle to the calling thread. This is the thread that we are
+ // going to profile. We need a real handle because we are going to use it in
+ // the sampler thread.
+ explicit PlatformData(BaseProfilerThreadId aThreadId)
+ : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
+ MOZ_ASSERT(DWORD(aThreadId.ToNumber()) == ::GetCurrentThreadId());
+ }
+
+ ~PlatformData() {
+ if (mProfiledThread != nullptr) {
+ CloseHandle(mProfiledThread);
+ mProfiledThread = nullptr;
+ }
+ }
+
+ HANDLE ProfiledThread() { return mProfiledThread; }
+
+ private:
+ HANDLE mProfiledThread;
+};
+
+#if defined(USE_MOZ_STACK_WALK)
+HANDLE
+GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); }
+#endif
+
+static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ HANDLE profiled_thread =
+ aRegisteredThread.GetPlatformData()->ProfiledThread();
+ if (profiled_thread == nullptr) {
+ return;
+ }
+
+ // Context used for sampling the register state of the profiled thread.
+ CONTEXT context;
+ memset(&context, 0, sizeof(context));
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
+ if (SuspendThread(profiled_thread) == kSuspendFailed) {
+ return;
+ }
+
+ // SuspendThread is asynchronous, so the thread may still be running.
+ // Call GetThreadContext first to ensure the thread is really suspended.
+ // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+ // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+ // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if defined(GP_ARCH_amd64)
+ context.ContextFlags = CONTEXT_FULL;
+#else
+ context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+#endif
+ if (!GetThreadContext(profiled_thread, &context)) {
+ ResumeThread(profiled_thread);
+ return;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. We must be very careful
+ // what we do here, or risk deadlock. See the corresponding comment in
+ // platform-linux-android.cpp for details.
+
+ Registers regs;
+ PopulateRegsFromContext(regs, &context);
+ aProcessRegs(regs, aNow);
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ ResumeThread(profiled_thread);
+
+ // The profiler's critical section ends here.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds, uint32_t aFeatures)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+ mNoTimerResolutionChange(
+ ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) {
+ if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
+ // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
+ // is not changed. However, if the requested interval is sufficiently low,
+ // the resolution will be adjusted to match. Note that this affects all
+ // timers in Firefox, and could therefore hide issues while profiling. This
+ // change may be prevented with the "notimerresolutionchange" feature.
+ ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+ }
+
+ // Create a new thread. It is important to use _beginthreadex() instead of
+ // the Win32 function CreateThread(), because the CreateThread() does not
+ // initialize thread-specific structures in the C runtime library.
+ mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
+ /* stack_size */ 0,
+ ThreadEntry, this,
+ /* initflag */ 0, nullptr));
+ if (mThread == 0) {
+ MOZ_CRASH("_beginthreadex failed");
+ }
+}
+
+SamplerThread::~SamplerThread() {
+ WaitForSingleObject(mThread, INFINITE);
+
+ // Close our own handle for the thread.
+ if (mThread != kNoThread) {
+ CloseHandle(mThread);
+ }
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ // For now, keep the old behaviour of minimum Sleep(1), even for
+ // smaller-than-usual sleeps after an overshoot, unless the user has
+ // explicitly opted into a sub-millisecond profiler interval.
+ if (mIntervalMicroseconds >= 1000) {
+ ::Sleep(std::max(1u, aMicroseconds / 1000));
+ } else {
+ TimeStamp start = TimeStamp::Now();
+ TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+ // First, sleep for as many whole milliseconds as possible.
+ if (aMicroseconds >= 1000) {
+ ::Sleep(aMicroseconds / 1000);
+ }
+
+ // Then, spin until enough time has passed.
+ while (TimeStamp::Now() < end) {
+ YieldProcessor();
+ }
+ }
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+ if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
+ // Disable any timer resolution changes we've made. Do it now while
+ // gPSMutex is locked, i.e. before any other SamplerThread can be created
+ // and call ::timeBeginPeriod().
+ //
+ // It's safe to do this now even though this SamplerThread is still alive,
+ // because the next time the main loop of Run() iterates it won't get past
+ // the mActivityGeneration check, and so it won't make any more ::Sleep()
+ // calls.
+ ::timeEndPeriod(mIntervalMicroseconds / 1000);
+ }
+
+ mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+# define REGISTERS_SYNC_POPULATE(regs) \
+ CONTEXT context; \
+ RtlCaptureContext(&context); \
+ PopulateRegsFromContext(regs, &context);
+#endif
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp
new file mode 100644
index 0000000000..4f69aadd4a
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.cpp
@@ -0,0 +1,3830 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// There are three kinds of samples done by the profiler.
+//
+// - A "periodic" sample is the most complex kind. It is done in response to a
+// timer while the profiler is active. It involves writing a stack trace plus
+// a variety of other values (memory measurements, responsiveness
+// measurements, etc.) into the main ProfileBuffer. The sampling is done from
+// off-thread, and so SuspendAndSampleAndResumeThread() is used to get the
+// register values.
+//
+// - A "synchronous" sample is a simpler kind. It is done in response to an API
+// call (profiler_get_backtrace()). It involves writing a stack trace and
+// little else into a temporary ProfileBuffer, and wrapping that up in a
+// ProfilerBacktrace that can be subsequently used in a marker. The sampling
+// is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the
+// register values.
+//
+// - A "backtrace" sample is the simplest kind. It is done in response to an
+// API call (profiler_suspend_and_sample_thread()). It involves getting a
+// stack trace via a ProfilerStackCollector; it does not write to a
+// ProfileBuffer. The sampling is done from off-thread, and so uses
+// SuspendAndSampleAndResumeThread() to get the register values.
+
+#include "platform.h"
+
+#include <algorithm>
+#include <errno.h>
+#include <fstream>
+#include <ostream>
+#include <set>
+#include <sstream>
+#include <string_view>
+
+// #include "memory_hooks.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/BaseAndGeckoProfilerDetail.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/DoubleConversion.h"
+#include "mozilla/Printf.h"
+#include "mozilla/ProfilerBufferSize.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/Services.h"
+#include "mozilla/Span.h"
+#include "mozilla/StackWalk.h"
+#ifdef XP_WIN
+# include "mozilla/StackWalkThread.h"
+# include "mozilla/WindowsStackWalkInitialization.h"
+#endif
+#include "mozilla/StaticPtr.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "prdtoa.h"
+#include "prtime.h"
+
+#include "BaseProfiler.h"
+#include "BaseProfilingCategory.h"
+#include "PageInformation.h"
+#include "ProfiledThreadData.h"
+#include "ProfilerBacktrace.h"
+#include "ProfileBuffer.h"
+#include "RegisteredThread.h"
+#include "BaseProfilerSharedLibraries.h"
+#include "ThreadInfo.h"
+#include "VTuneProfiler.h"
+
+// Win32 builds always have frame pointers, so FramePointerStackWalk() always
+// works.
+#if defined(GP_PLAT_x86_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// Win64 builds always omit frame pointers, so we use the slower
+// MozStackWalk(), which works in that case.
+#if defined(GP_PLAT_amd64_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_MOZ_STACK_WALK
+#endif
+
+// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
+// MozStackWalk().
+#if defined(GP_PLAT_arm64_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_MOZ_STACK_WALK
+#endif
+
+// Mac builds use FramePointerStackWalk(). Even if we build without
+// frame pointers, we'll still get useful stacks in system libraries
+// because those always have frame pointers.
+// We don't use MozStackWalk() on Mac.
+#if defined(GP_OS_darwin)
+# define HAVE_NATIVE_UNWIND
+# define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// No stack-walking in baseprofiler on linux, android, bsd.
+// APIs now make it easier to capture backtraces from the Base Profiler, which
+// is currently not supported on these platform, and would lead to a MOZ_CRASH
+// in REGISTERS_SYNC_POPULATE(). `#if 0` added in bug 1658232, follow-up bugs
+// should be referenced in meta bug 1557568.
+#if 0
+// Android builds use the ARM Exception Handling ABI to unwind.
+# if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+# define HAVE_NATIVE_UNWIND
+# define USE_EHABI_STACKWALK
+# include "EHABIStackWalk.h"
+# endif
+
+// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
+ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
+ defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
+ defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
+ defined(GP_PLAT_arm64_freebsd)
+# define HAVE_NATIVE_UNWIND
+# define USE_LUL_STACKWALK
+# include "lul/LulMain.h"
+# include "lul/platform-linux-lul.h"
+
+// On linux we use LUL for periodic samples and synchronous samples, but we use
+// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
+// (See the comment at the top of the file for a definition of
+// periodic/synchronous/backtrace.).
+//
+// FramePointerStackWalk can produce incomplete stacks when the current entry is
+// in a shared library without framepointers, however LUL can take a long time
+// to initialize, which is undesirable for consumers of
+// profiler_suspend_and_sample_thread like the Background Hang Reporter.
+# if defined(MOZ_PROFILING)
+# define USE_FRAME_POINTER_STACK_WALK
+# endif
+# endif
+#endif
+
+// We can only stackwalk without expensive initialization on platforms which
+// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
+// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
+// which can be expensive.
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+# define HAVE_FASTINIT_NATIVE_UNWIND
+#endif
+
+#ifdef MOZ_VALGRIND
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include <ucontext.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+using detail::RacyFeatures;
+
+bool LogTest(int aLevelToTest) {
+ static const int maxLevel = getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING") ? 5
+ : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING") ? 4
+ : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3
+ : 0;
+ return aLevelToTest <= maxLevel;
+}
+
+void PrintToConsole(const char* aFmt, ...) {
+ va_list args;
+ va_start(args, aFmt);
+#if defined(ANDROID)
+ __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args);
+#else
+ vfprintf(stderr, aFmt, args);
+#endif
+ va_end(args);
+}
+
+ProfileChunkedBuffer& profiler_get_core_buffer() {
+ // This needs its own mutex, because it is used concurrently from functions
+ // guarded by gPSMutex as well as others without safety (e.g.,
+ // profiler_add_marker). It is *not* used inside the critical section of the
+ // sampler, because mutexes cannot be used there.
+ static ProfileChunkedBuffer sProfileChunkedBuffer{
+ ProfileChunkedBuffer::ThreadSafety::WithMutex};
+ return sProfileChunkedBuffer;
+}
+
+Atomic<int, MemoryOrdering::Relaxed> gSkipSampling;
+
+constexpr static bool ValidateFeatures() {
+ int expectedFeatureNumber = 0;
+
+ // Feature numbers should start at 0 and increase by 1 each.
+#define CHECK_FEATURE(n_, str_, Name_, desc_) \
+ if ((n_) != expectedFeatureNumber) { \
+ return false; \
+ } \
+ ++expectedFeatureNumber;
+
+ BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
+
+#undef CHECK_FEATURE
+
+ return true;
+}
+
+static_assert(ValidateFeatures(), "Feature list is invalid");
+
+// Return all features that are available on this platform.
+static uint32_t AvailableFeatures() {
+ uint32_t features = 0;
+
+#define ADD_FEATURE(n_, str_, Name_, desc_) \
+ ProfilerFeature::Set##Name_(features);
+
+ // Add all the possible features.
+ BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
+
+#undef ADD_FEATURE
+
+ // Now remove features not supported on this platform/configuration.
+ ProfilerFeature::ClearJava(features);
+ ProfilerFeature::ClearJS(features);
+ ProfilerFeature::ClearScreenshots(features);
+#if !defined(HAVE_NATIVE_UNWIND)
+ ProfilerFeature::ClearStackWalk(features);
+#endif
+#if !defined(GP_OS_windows)
+ ProfilerFeature::ClearNoTimerResolutionChange(features);
+#endif
+
+ return features;
+}
+
+// Default features common to all contexts (even if not available).
+static constexpr uint32_t DefaultFeatures() {
+ return ProfilerFeature::Java | ProfilerFeature::JS |
+ ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
+ ProfilerFeature::ProcessCPU;
+}
+
+// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
+// available).
+static constexpr uint32_t StartupExtraDefaultFeatures() {
+ // Enable mainthreadio by default for startup profiles as startup is heavy on
+ // I/O operations, and main thread I/O is really important to see there.
+ return ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages;
+}
+
+// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS.
+// Use `PSAutoLock lock;` to take the lock until the end of the enclosing block.
+// External profilers may use this same lock for their own data, but as the lock
+// is non-recursive, *only* `f(PSLockRef, ...)` functions below should be
+// called, to avoid double-locking.
+class MOZ_RAII PSAutoLock {
+ public:
+ PSAutoLock() : mLock(gPSMutex) {}
+
+ PSAutoLock(const PSAutoLock&) = delete;
+ void operator=(const PSAutoLock&) = delete;
+
+ [[nodiscard]] static bool IsLockedOnCurrentThread() {
+ return gPSMutex.IsLockedOnCurrentThread();
+ }
+
+ private:
+ static detail::BaseProfilerMutex gPSMutex;
+ detail::BaseProfilerAutoLock mLock;
+};
+
+detail::BaseProfilerMutex PSAutoLock::gPSMutex{"Base Profiler mutex"};
+
+// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
+// fields.
+typedef const PSAutoLock& PSLockRef;
+
+#define PS_GET(type_, name_) \
+ static type_ name_(PSLockRef) { \
+ MOZ_ASSERT(sInstance); \
+ return sInstance->m##name_; \
+ }
+
+#define PS_GET_LOCKLESS(type_, name_) \
+ static type_ name_() { \
+ MOZ_ASSERT(sInstance); \
+ return sInstance->m##name_; \
+ }
+
+#define PS_GET_AND_SET(type_, name_) \
+ PS_GET(type_, name_) \
+ static void Set##name_(PSLockRef, type_ a##name_) { \
+ MOZ_ASSERT(sInstance); \
+ sInstance->m##name_ = a##name_; \
+ }
+
+// All functions in this file can run on multiple threads unless they have an
+// NS_IsMainThread() assertion.
+
+// This class contains the profiler's core global state, i.e. that which is
+// valid even when the profiler is not active. Most profile operations can't do
+// anything useful when this class is not instantiated, so we release-assert
+// its non-nullness in all such operations.
+//
+// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
+// PSAutoLock reference as an argument as proof that the gPSMutex is currently
+// locked. This makes it clear when gPSMutex is locked and helps avoid
+// accidental unlocked accesses to global state. There are ways to circumvent
+// this mechanism, but please don't do so without *very* good reason and a
+// detailed explanation.
+//
+// The exceptions to this rule:
+//
+// - mProcessStartTime, because it's immutable;
+//
+// - each thread's RacyRegisteredThread object is accessible without locking via
+// TLSRegisteredThread::RacyRegisteredThread().
+class CorePS {
+ private:
+ CorePS()
+ : mProcessStartTime(TimeStamp::ProcessCreation())
+#ifdef USE_LUL_STACKWALK
+ ,
+ mLul(nullptr)
+#endif
+ {
+ }
+
+ ~CorePS() {}
+
+ public:
+ static void Create(PSLockRef aLock) {
+ MOZ_ASSERT(!sInstance);
+ sInstance = new CorePS();
+ }
+
+ static void Destroy(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ delete sInstance;
+ sInstance = nullptr;
+ }
+
+ // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
+ // being locked. This is because CorePS is instantiated so early on the main
+ // thread that we don't have to worry about it being racy.
+ static bool Exists() { return !!sInstance; }
+
+ static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
+ size_t& aProfSize, size_t& aLulSize) {
+ MOZ_ASSERT(sInstance);
+
+ aProfSize += aMallocSizeOf(sInstance);
+
+ for (auto& registeredThread : sInstance->mRegisteredThreads) {
+ aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
+ }
+
+ for (auto& registeredPage : sInstance->mRegisteredPages) {
+ aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
+ }
+
+ // Measurement of the following things may be added later if DMD finds it
+ // is worthwhile:
+ // - CorePS::mRegisteredThreads itself (its elements' children are
+ // measured above)
+ // - CorePS::mRegisteredPages itself (its elements' children are
+ // measured above)
+ // - CorePS::mInterposeObserver
+
+#if defined(USE_LUL_STACKWALK)
+ if (sInstance->mLul) {
+ aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
+ }
+#endif
+ }
+
+ // No PSLockRef is needed for this field because it's immutable.
+ PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime)
+
+ PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
+
+ static void AppendRegisteredThread(
+ PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ MOZ_RELEASE_ASSERT(
+ sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
+ }
+
+ static void RemoveRegisteredThread(PSLockRef,
+ RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ // Remove aRegisteredThread from mRegisteredThreads.
+ for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
+ if (rt.get() == aRegisteredThread) {
+ sInstance->mRegisteredThreads.erase(&rt);
+ return;
+ }
+ }
+ }
+
+ PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
+
+ static void AppendRegisteredPage(PSLockRef,
+ RefPtr<PageInformation>&& aRegisteredPage) {
+ MOZ_ASSERT(sInstance);
+ struct RegisteredPageComparator {
+ PageInformation* aA;
+ bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
+ };
+
+ auto foundPageIter = std::find_if(
+ sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
+ RegisteredPageComparator{aRegisteredPage.get()});
+
+ if (foundPageIter != sInstance->mRegisteredPages.end()) {
+ if ((*foundPageIter)->Url() == "about:blank") {
+ // When a BrowsingContext is loaded, the first url loaded in it will be
+ // about:blank, and if the principal matches, the first document loaded
+ // in it will share an inner window. That's why we should delete the
+ // intermittent about:blank if they share the inner window.
+ sInstance->mRegisteredPages.erase(foundPageIter);
+ } else {
+ // Do not register the same page again.
+ return;
+ }
+ }
+ MOZ_RELEASE_ASSERT(
+ sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
+ }
+
+ static void RemoveRegisteredPage(PSLockRef,
+ uint64_t aRegisteredInnerWindowID) {
+ MOZ_ASSERT(sInstance);
+ // Remove RegisteredPage from mRegisteredPages by given inner window ID.
+ sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
+ return rd->InnerWindowID() == aRegisteredInnerWindowID;
+ });
+ }
+
+ static void ClearRegisteredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mRegisteredPages.clear();
+ }
+
+ PS_GET(const Vector<BaseProfilerCount*>&, Counters)
+
+ static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
+ MOZ_ASSERT(sInstance);
+ // we don't own the counter; they may be stored in static objects
+ MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
+ }
+
+ static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
+ // we may be called to remove a counter after the profiler is stopped or
+ // late in shutdown.
+ if (sInstance) {
+ auto* counter = std::find(sInstance->mCounters.begin(),
+ sInstance->mCounters.end(), aCounter);
+ MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
+ sInstance->mCounters.erase(counter);
+ }
+ }
+
+#ifdef USE_LUL_STACKWALK
+ static lul::LUL* Lul(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mLul.get();
+ }
+ static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mLul = std::move(aLul);
+ }
+#endif
+
+ PS_GET_AND_SET(const std::string&, ProcessName)
+ PS_GET_AND_SET(const std::string&, ETLDplus1)
+
+ private:
+ // The singleton instance
+ static CorePS* sInstance;
+
+ // The time that the process started.
+ const TimeStamp mProcessStartTime;
+
+ // Info on all the registered threads.
+ // ThreadIds in mRegisteredThreads are unique.
+ Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
+
+ // Info on all the registered pages.
+ // InnerWindowIDs in mRegisteredPages are unique.
+ Vector<RefPtr<PageInformation>> mRegisteredPages;
+
+ // Non-owning pointers to all active counters
+ Vector<BaseProfilerCount*> mCounters;
+
+#ifdef USE_LUL_STACKWALK
+ // LUL's state. Null prior to the first activation, non-null thereafter.
+ UniquePtr<lul::LUL> mLul;
+#endif
+
+ // Process name, provided by child process initialization code.
+ std::string mProcessName;
+ // Private name, provided by child process initialization code (eTLD+1 in
+ // fission)
+ std::string mETLDplus1;
+};
+
+CorePS* CorePS::sInstance = nullptr;
+
+class SamplerThread;
+
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+ double aInterval, uint32_t aFeatures);
+
+struct LiveProfiledThreadData {
+ RegisteredThread* mRegisteredThread;
+ UniquePtr<ProfiledThreadData> mProfiledThreadData;
+};
+
+// The buffer size is provided as a number of "entries", this is their size in
+// bytes.
+constexpr static uint32_t scBytesPerEntry = 8;
+
+// This class contains the profiler's global state that is valid only when the
+// profiler is active. When not instantiated, the profiler is inactive.
+//
+// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
+// CorePS.
+//
+class ActivePS {
+ private:
+ constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
+ return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
+ scBytesPerEntry / scMinimumNumberOfChunks,
+ size_t(scMaximumChunkSize)));
+ }
+
+ static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
+ // Filter out any features unavailable in this platform/configuration.
+ aFeatures &= AvailableFeatures();
+
+ // Some features imply others.
+ if (aFeatures & ProfilerFeature::FileIOAll) {
+ aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
+ } else if (aFeatures & ProfilerFeature::FileIO) {
+ aFeatures |= ProfilerFeature::MainThreadIO;
+ }
+
+ return aFeatures;
+ }
+
+ ActivePS(PSLockRef aLock, const TimeStamp& aProfilingStartTime,
+ PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration)
+ : mProfilingStartTime(aProfilingStartTime),
+ mGeneration(sNextGeneration++),
+ mCapacity(aCapacity),
+ mDuration(aDuration),
+ mInterval(aInterval),
+ mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
+ mProfileBufferChunkManager(
+ MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
+ size_t(ClampToAllowedEntries(aCapacity.Value())) *
+ scBytesPerEntry,
+ ChunkSizeForEntries(aCapacity.Value()))),
+ mProfileBuffer([this]() -> ProfileChunkedBuffer& {
+ ProfileChunkedBuffer& buffer = profiler_get_core_buffer();
+ buffer.SetChunkManager(*mProfileBufferChunkManager);
+ return buffer;
+ }()),
+ // The new sampler thread doesn't start sampling immediately because the
+ // main loop within Run() is blocked until this function's caller
+ // unlocks gPSMutex.
+ mSamplerThread(
+ NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
+ mIsPaused(false),
+ mIsSamplingPaused(false) {
+ // Deep copy and lower-case aFilters.
+ MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
+ MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount));
+ for (uint32_t i = 0; i < aFilterCount; ++i) {
+ mFilters[i] = aFilters[i];
+ mFiltersLowered[i].reserve(mFilters[i].size());
+ std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
+ std::back_inserter(mFiltersLowered[i]), ::tolower);
+ }
+ }
+
+ ~ActivePS() {
+ if (mProfileBufferChunkManager) {
+ // We still control the chunk manager, remove it from the core buffer.
+ profiler_get_core_buffer().ResetChunkManager();
+ }
+ }
+
+ bool ThreadSelected(const char* aThreadName) {
+ if (mFiltersLowered.empty()) {
+ return true;
+ }
+
+ std::string name = aThreadName;
+ std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+ for (const auto& filter : mFiltersLowered) {
+ if (filter == "*") {
+ return true;
+ }
+
+ // Crude, non UTF-8 compatible, case insensitive substring search
+ if (name.find(filter) != std::string::npos) {
+ return true;
+ }
+
+ // If the filter is "pid:<my pid>", profile all threads.
+ if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public:
+ static void Create(PSLockRef aLock, const TimeStamp& aProfilingStartTime,
+ PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount, const Maybe<double>& aDuration) {
+ MOZ_ASSERT(!sInstance);
+ sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval,
+ aFeatures, aFilters, aFilterCount, aDuration);
+ }
+
+ [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ auto samplerThread = sInstance->mSamplerThread;
+ delete sInstance;
+ sInstance = nullptr;
+
+ return samplerThread;
+ }
+
+ static bool Exists(PSLockRef) { return !!sInstance; }
+
+ static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
+ const Maybe<double>& aDuration, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount) {
+ MOZ_ASSERT(sInstance);
+ if (sInstance->mCapacity != aCapacity ||
+ sInstance->mDuration != aDuration ||
+ sInstance->mInterval != aInterval ||
+ sInstance->mFeatures != aFeatures ||
+ sInstance->mFilters.length() != aFilterCount) {
+ return false;
+ }
+
+ for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
+ if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
+ MOZ_ASSERT(sInstance);
+
+ size_t n = aMallocSizeOf(sInstance);
+
+ n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
+
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - mLiveProfiledThreads (both the array itself, and the contents)
+ // - mDeadProfiledThreads (both the array itself, and the contents)
+ //
+
+ return n;
+ }
+
+ static UniquePtr<ProfileBufferChunkManagerWithLocalLimit>
+ ExtractBaseProfilerChunkManager(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return std::move(sInstance->mProfileBufferChunkManager);
+ }
+
+ static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->ThreadSelected(aInfo->Name());
+ }
+
+ PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime)
+
+ PS_GET(uint32_t, Generation)
+
+ PS_GET(PowerOfTwo32, Capacity)
+
+ PS_GET(Maybe<double>, Duration)
+
+ PS_GET(double, Interval)
+
+ PS_GET(uint32_t, Features)
+
+#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
+ static bool Feature##Name_(PSLockRef) { \
+ MOZ_ASSERT(sInstance); \
+ return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
+
+#undef PS_GET_FEATURE
+
+ PS_GET(const Vector<std::string>&, Filters)
+ PS_GET(const Vector<std::string>&, FiltersLowered)
+
+ static void FulfillChunkRequests(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ if (sInstance->mProfileBufferChunkManager) {
+ sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
+ }
+ }
+
+ static ProfileBuffer& Buffer(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mProfileBuffer;
+ }
+
+ static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mLiveProfiledThreads;
+ }
+
+ // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
+ // for all threads that should be included in a profile, both for threads
+ // that are still registered, and for threads that have been unregistered but
+ // still have data in the buffer.
+ // For threads that have already been unregistered, the RegisteredThread
+ // pointer will be null.
+ // The returned array is sorted by thread register time.
+ // Do not hold on to the return value across thread registration or profiler
+ // restarts.
+ static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
+ ProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
+ MOZ_RELEASE_ASSERT(
+ array.initCapacity(sInstance->mLiveProfiledThreads.length() +
+ sInstance->mDeadProfiledThreads.length()));
+ for (auto& t : sInstance->mLiveProfiledThreads) {
+ MOZ_RELEASE_ASSERT(array.append(
+ std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
+ }
+ for (auto& t : sInstance->mDeadProfiledThreads) {
+ MOZ_RELEASE_ASSERT(
+ array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
+ }
+
+ std::sort(array.begin(), array.end(),
+ [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
+ const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
+ return a.second->Info()->RegisterTime() <
+ b.second->Info()->RegisterTime();
+ });
+ return array;
+ }
+
+ static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ Vector<RefPtr<PageInformation>> array;
+ for (auto& d : CorePS::RegisteredPages(aLock)) {
+ MOZ_RELEASE_ASSERT(array.append(d));
+ }
+ for (auto& d : sInstance->mDeadProfiledPages) {
+ MOZ_RELEASE_ASSERT(array.append(d));
+ }
+ // We don't need to sort the pages like threads since we won't show them
+ // as a list.
+ return array;
+ }
+
+ // Do a linear search through mLiveProfiledThreads to find the
+ // ProfiledThreadData object for a RegisteredThread.
+ static ProfiledThreadData* GetProfiledThreadData(
+ PSLockRef, RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ for (const LiveProfiledThreadData& thread :
+ sInstance->mLiveProfiledThreads) {
+ if (thread.mRegisteredThread == aRegisteredThread) {
+ return thread.mProfiledThreadData.get();
+ }
+ }
+ return nullptr;
+ }
+
+ static ProfiledThreadData* AddLiveProfiledThread(
+ PSLockRef, RegisteredThread* aRegisteredThread,
+ UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
+ MOZ_ASSERT(sInstance);
+ MOZ_RELEASE_ASSERT(
+ sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
+ aRegisteredThread, std::move(aProfiledThreadData)}));
+
+ // Return a weak pointer to the ProfiledThreadData object.
+ return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
+ }
+
+ static void UnregisterThread(PSLockRef aLockRef,
+ RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+
+ DiscardExpiredDeadProfiledThreads(aLockRef);
+
+ // Find the right entry in the mLiveProfiledThreads array and remove the
+ // element, moving the ProfiledThreadData object for the thread into the
+ // mDeadProfiledThreads array.
+ // The thread's RegisteredThread object gets destroyed here.
+ for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
+ LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
+ if (thread.mRegisteredThread == aRegisteredThread) {
+ thread.mProfiledThreadData->NotifyUnregistered(
+ sInstance->mProfileBuffer.BufferRangeEnd());
+ MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
+ std::move(thread.mProfiledThreadData)));
+ sInstance->mLiveProfiledThreads.erase(
+ &sInstance->mLiveProfiledThreads[i]);
+ return;
+ }
+ }
+ }
+
+ PS_GET_AND_SET(bool, IsPaused)
+
+ // True if sampling is paused (though generic `SetIsPaused()` or specific
+ // `SetIsSamplingPaused()`).
+ static bool IsSamplingPaused(PSLockRef lock) {
+ MOZ_ASSERT(sInstance);
+ return IsPaused(lock) || sInstance->mIsSamplingPaused;
+ }
+
+ static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mIsSamplingPaused = aIsSamplingPaused;
+ }
+
+ static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard any dead threads that were unregistered before bufferRangeStart.
+ sInstance->mDeadProfiledThreads.eraseIf(
+ [bufferRangeStart](
+ const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
+ Maybe<uint64_t> bufferPosition =
+ aProfiledThreadData->BufferPositionWhenUnregistered();
+ MOZ_RELEASE_ASSERT(bufferPosition,
+ "should have unregistered this thread");
+ return *bufferPosition < bufferRangeStart;
+ });
+ }
+
+ static void UnregisterPage(PSLockRef aLock,
+ uint64_t aRegisteredInnerWindowID) {
+ MOZ_ASSERT(sInstance);
+ auto& registeredPages = CorePS::RegisteredPages(aLock);
+ for (size_t i = 0; i < registeredPages.length(); i++) {
+ RefPtr<PageInformation>& page = registeredPages[i];
+ if (page->InnerWindowID() == aRegisteredInnerWindowID) {
+ page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
+ MOZ_RELEASE_ASSERT(
+ sInstance->mDeadProfiledPages.append(std::move(page)));
+ registeredPages.erase(&registeredPages[i--]);
+ }
+ }
+ }
+
+ static void DiscardExpiredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard any dead pages that were unregistered before
+ // bufferRangeStart.
+ sInstance->mDeadProfiledPages.eraseIf(
+ [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
+ Maybe<uint64_t> bufferPosition =
+ aProfiledPage->BufferPositionWhenUnregistered();
+ MOZ_RELEASE_ASSERT(bufferPosition,
+ "should have unregistered this page");
+ return *bufferPosition < bufferRangeStart;
+ });
+ }
+
+ static void ClearUnregisteredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mDeadProfiledPages.clear();
+ }
+
+ static void ClearExpiredExitProfiles(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard exit profiles that were gathered before our buffer RangeStart.
+ sInstance->mExitProfiles.eraseIf(
+ [bufferRangeStart](const ExitProfile& aExitProfile) {
+ return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
+ });
+ }
+
+ static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) {
+ MOZ_ASSERT(sInstance);
+
+ ClearExpiredExitProfiles(aLock);
+
+ MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
+ ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
+ }
+
+ static Vector<std::string> MoveExitProfiles(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+
+ ClearExpiredExitProfiles(aLock);
+
+ Vector<std::string> profiles;
+ MOZ_RELEASE_ASSERT(
+ profiles.initCapacity(sInstance->mExitProfiles.length()));
+ for (auto& profile : sInstance->mExitProfiles) {
+ MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
+ }
+ sInstance->mExitProfiles.clear();
+ return profiles;
+ }
+
+ private:
+ // The singleton instance.
+ static ActivePS* sInstance;
+
+ const TimeStamp mProfilingStartTime;
+
+ // We need to track activity generations. If we didn't we could have the
+ // following scenario.
+ //
+ // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
+ // gPSMutex, deletes the SamplerThread (which does a join).
+ //
+ // - profiler_start() runs on a different thread, locks gPSMutex,
+ // re-instantiates ActivePS, unlocks gPSMutex -- all before the join
+ // completes.
+ //
+ // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
+ // and continues as if the start/stop pair didn't occur. Also
+ // profiler_stop() is stuck, unable to finish.
+ //
+ // By checking ActivePS *and* the generation, we can avoid this scenario.
+ // sNextGeneration is used to track the next generation number; it is static
+ // because it must persist across different ActivePS instantiations.
+ const uint32_t mGeneration;
+ static uint32_t sNextGeneration;
+
+ // The maximum number of 8-byte entries in mProfileBuffer.
+ const PowerOfTwo32 mCapacity;
+
+ // The maximum duration of entries in mProfileBuffer, in seconds.
+ const Maybe<double> mDuration;
+
+ // The interval between samples, measured in milliseconds.
+ const double mInterval;
+
+ // The profile features that are enabled.
+ const uint32_t mFeatures;
+
+ // Substrings of names of threads we want to profile.
+ Vector<std::string> mFilters;
+ Vector<std::string> mFiltersLowered;
+
+ // The chunk manager used by `mProfileBuffer` below.
+ // May become null if it gets transferred to the Gecko Profiler.
+ UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager;
+
+ // The buffer into which all samples are recorded.
+ ProfileBuffer mProfileBuffer;
+
+ // ProfiledThreadData objects for any threads that were profiled at any point
+ // during this run of the profiler:
+ // - mLiveProfiledThreads contains all threads that are still registered, and
+ // - mDeadProfiledThreads contains all threads that have already been
+ // unregistered but for which there is still data in the profile buffer.
+ Vector<LiveProfiledThreadData> mLiveProfiledThreads;
+ Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
+
+ // Info on all the dead pages.
+ // Registered pages are being moved to this array after unregistration.
+ // We are keeping them in case we need them in the profile data.
+ // We are removing them when we ensure that we won't need them anymore.
+ Vector<RefPtr<PageInformation>> mDeadProfiledPages;
+
+ // The current sampler thread. This class is not responsible for destroying
+ // the SamplerThread object; the Destroy() method returns it so the caller
+ // can destroy it.
+ SamplerThread* const mSamplerThread;
+
+ // Is the profiler fully paused?
+ bool mIsPaused;
+
+ // Is the profiler periodic sampling paused?
+ bool mIsSamplingPaused;
+
+ struct ExitProfile {
+ std::string mJSON;
+ uint64_t mBufferPositionAtGatherTime;
+ };
+ Vector<ExitProfile> mExitProfiles;
+};
+
+ActivePS* ActivePS::sInstance = nullptr;
+uint32_t ActivePS::sNextGeneration = 0;
+
+#undef PS_GET
+#undef PS_GET_LOCKLESS
+#undef PS_GET_AND_SET
+
+namespace detail {
+
+TimeStamp GetProfilingStartTime() {
+ if (!CorePS::Exists()) {
+ return {};
+ }
+ PSAutoLock lock;
+ if (!ActivePS::Exists(lock)) {
+ return {};
+ }
+ return ActivePS::ProfilingStartTime();
+}
+
+[[nodiscard]] MFBT_API UniquePtr<ProfileBufferChunkManagerWithLocalLimit>
+ExtractBaseProfilerChunkManager() {
+ PSAutoLock lock;
+ if (MOZ_UNLIKELY(!ActivePS::Exists(lock))) {
+ return nullptr;
+ }
+ return ActivePS::ExtractBaseProfilerChunkManager(lock);
+}
+
+} // namespace detail
+
+Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
+
+/* static */
+void RacyFeatures::SetActive(uint32_t aFeatures) {
+ sActiveAndFeatures = Active | aFeatures;
+}
+
+/* static */
+void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; }
+
+/* static */
+bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
+
+/* static */
+void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; }
+
+/* static */
+void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; }
+
+/* static */
+void RacyFeatures::SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; }
+
+/* static */
+void RacyFeatures::SetSamplingUnpaused() {
+ sActiveAndFeatures &= ~SamplingPaused;
+}
+
+/* static */
+bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && (af & aFeature);
+}
+
+/* static */
+bool RacyFeatures::IsActiveWithoutFeature(uint32_t aFeature) {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && !(af & aFeature);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndUnpaused() {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && !(af & Paused);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndSamplingUnpaused() {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && !(af & (Paused | SamplingPaused));
+}
+
+// Each live thread has a RegisteredThread, and we store a reference to it in
+// TLS. This class encapsulates that TLS.
+class TLSRegisteredThread {
+ public:
+ static bool Init(PSLockRef) {
+ bool ok1 = sRegisteredThread.init();
+ bool ok2 = AutoProfilerLabel::sProfilingStack.init();
+ return ok1 && ok2;
+ }
+
+ // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
+ static class RegisteredThread* RegisteredThread(PSLockRef) {
+ return sRegisteredThread.get();
+ }
+
+ // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
+ static class RacyRegisteredThread* RacyRegisteredThread() {
+ class RegisteredThread* registeredThread = sRegisteredThread.get();
+ return registeredThread ? &registeredThread->RacyRegisteredThread()
+ : nullptr;
+ }
+
+ // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
+ // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
+ // is marginally slower because it requires an extra pointer indirection.
+ static ProfilingStack* Stack() {
+ return AutoProfilerLabel::sProfilingStack.get();
+ }
+
+ static void SetRegisteredThread(PSLockRef,
+ class RegisteredThread* aRegisteredThread) {
+ sRegisteredThread.set(aRegisteredThread);
+ AutoProfilerLabel::sProfilingStack.set(
+ aRegisteredThread
+ ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
+ : nullptr);
+ }
+
+ private:
+ // This is a non-owning reference to the RegisteredThread;
+ // CorePS::mRegisteredThreads is the owning reference. On thread
+ // deregistration, this reference is cleared and the RegisteredThread is
+ // destroyed.
+ static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
+};
+
+MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
+
+/* static */
+ProfilingStack* AutoProfilerLabel::GetProfilingStack() {
+ return sProfilingStack.get();
+}
+
+// Although you can access a thread's ProfilingStack via
+// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
+// directly to the ProfilingStack. Here's why.
+//
+// - We need to be able to push to and pop from the ProfilingStack in
+// AutoProfilerLabel.
+//
+// - The class functions are hot and must be defined in BaseProfiler.h so they
+// can be inlined.
+//
+// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
+// BaseProfiler.h.
+//
+// This second pointer isn't ideal, but does provide a way to satisfy those
+// constraints. TLSRegisteredThread is responsible for updating it.
+MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
+
+namespace detail {
+
+[[nodiscard]] MFBT_API TimeStamp GetThreadRegistrationTime() {
+ if (!CorePS::Exists()) {
+ return {};
+ }
+
+ PSAutoLock lock;
+
+ RegisteredThread* registeredThread =
+ TLSRegisteredThread::RegisteredThread(lock);
+ if (!registeredThread) {
+ return {};
+ }
+
+ return registeredThread->Info()->RegisterTime();
+}
+
+} // namespace detail
+
+// The name of the main thread.
+static const char* const kMainThreadName = "GeckoMain";
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN sampling/unwinding code
+
+// Additional registers that have to be saved when thread is paused.
+#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \
+ defined(GP_ARCH_x86)
+# define UNWINDING_REGS_HAVE_ECX_EDX
+#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
+ defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \
+ defined(__x86_64__)
+# define UNWINDING_REGS_HAVE_R10_R12
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+# define UNWINDING_REGS_HAVE_LR_R7
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
+ defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \
+ defined(__aarch64__)
+# define UNWINDING_REGS_HAVE_LR_R11
+#endif
+
+// The registers used for stack unwinding and a few other sampling purposes.
+// The ctor does nothing; users are responsible for filling in the fields.
+class Registers {
+ public:
+ Registers()
+ : mPC{nullptr},
+ mSP{nullptr},
+ mFP{nullptr}
+#if defined(UNWINDING_REGS_HAVE_ECX_EDX)
+ ,
+ mEcx{nullptr},
+ mEdx{nullptr}
+#elif defined(UNWINDING_REGS_HAVE_R10_R12)
+ ,
+ mR10{nullptr},
+ mR12{nullptr}
+#elif defined(UNWINDING_REGS_HAVE_LR_R7)
+ ,
+ mLR{nullptr},
+ mR7{nullptr}
+#elif defined(UNWINDING_REGS_HAVE_LR_R11)
+ ,
+ mLR{nullptr},
+ mR11{nullptr}
+#endif
+ {
+ }
+
+ void Clear() { memset(this, 0, sizeof(*this)); }
+
+ // These fields are filled in by
+ // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
+ // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples.
+ Address mPC; // Instruction pointer.
+ Address mSP; // Stack pointer.
+ Address mFP; // Frame pointer.
+#if defined(UNWINDING_REGS_HAVE_ECX_EDX)
+ Address mEcx; // Temp for return address.
+ Address mEdx; // Temp for frame pointer.
+#elif defined(UNWINDING_REGS_HAVE_R10_R12)
+ Address mR10; // Temp for return address.
+ Address mR12; // Temp for frame pointer.
+#elif defined(UNWINDING_REGS_HAVE_LR_R7)
+ Address mLR; // ARM link register, or temp for return address.
+ Address mR7; // Temp for frame pointer.
+#elif defined(UNWINDING_REGS_HAVE_LR_R11)
+ Address mLR; // ARM link register, or temp for return address.
+ Address mR11; // Temp for frame pointer.
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+ // This contains all the registers, which means it duplicates the four fields
+ // above. This is ok.
+ ucontext_t* mContext; // The context from the signal handler.
+#endif
+};
+
+// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
+// looping on corrupted stacks.
+static const size_t MAX_NATIVE_FRAMES = 1024;
+
+struct NativeStack {
+ void* mPCs[MAX_NATIVE_FRAMES];
+ void* mSPs[MAX_NATIVE_FRAMES];
+ size_t mCount; // Number of frames filled.
+
+ NativeStack() : mPCs(), mSPs(), mCount(0) {}
+};
+
+// Merges the profiling stack and native stack, outputting the details to
+// aCollector.
+static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, const NativeStack& aNativeStack,
+ ProfilerStackCollector& aCollector) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ const ProfilingStack& profilingStack =
+ aRegisteredThread.RacyRegisteredThread().ProfilingStack();
+ const ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
+ uint32_t profilingStackFrameCount = profilingStack.stackSize();
+
+ Maybe<uint64_t> samplePosInBuffer;
+ if (!aIsSynchronous) {
+ // aCollector.SamplePositionInBuffer() will return Nothing() when
+ // profiler_suspend_and_sample_thread is called from the background hang
+ // reporter.
+ samplePosInBuffer = aCollector.SamplePositionInBuffer();
+ }
+ // While the profiling stack array is ordered oldest-to-youngest, the JS and
+ // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
+ // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
+ // and native arrays backwards. Note: this means the terminating condition
+ // jsIndex and nativeIndex is being < 0.
+ uint32_t profilingStackIndex = 0;
+ int32_t nativeIndex = aNativeStack.mCount - 1;
+
+ uint8_t* lastLabelFrameStackAddr = nullptr;
+
+ // Iterate as long as there is at least one frame remaining.
+ while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) {
+ // There are 1 to 3 frames available. Find and add the oldest.
+ uint8_t* profilingStackAddr = nullptr;
+ uint8_t* nativeStackAddr = nullptr;
+
+ if (profilingStackIndex != profilingStackFrameCount) {
+ const ProfilingStackFrame& profilingStackFrame =
+ profilingStackFrames[profilingStackIndex];
+
+ if (profilingStackFrame.isLabelFrame() ||
+ profilingStackFrame.isSpMarkerFrame()) {
+ lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
+ }
+
+ // Skip any JS_OSR frames. Such frames are used when the JS interpreter
+ // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
+ // To avoid both the profiling stack frame and jit frame being recorded
+ // (and showing up twice), the interpreter marks the interpreter
+ // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
+ if (profilingStackFrame.isOSRFrame()) {
+ profilingStackIndex++;
+ continue;
+ }
+
+ MOZ_ASSERT(lastLabelFrameStackAddr);
+ profilingStackAddr = lastLabelFrameStackAddr;
+ }
+
+ if (nativeIndex >= 0) {
+ nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
+ }
+
+ // If there's a native stack frame which has the same SP as a profiling
+ // stack frame, pretend we didn't see the native stack frame. Ditto for a
+ // native stack frame which has the same SP as a JS stack frame. In effect
+ // this means profiling stack frames or JS frames trump conflicting native
+ // frames.
+ if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
+ nativeStackAddr = nullptr;
+ nativeIndex--;
+ MOZ_ASSERT(profilingStackAddr);
+ }
+
+ // Sanity checks.
+ MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr);
+ MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr);
+
+ // Check to see if profiling stack frame is top-most.
+ if (profilingStackAddr > nativeStackAddr) {
+ MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
+ const ProfilingStackFrame& profilingStackFrame =
+ profilingStackFrames[profilingStackIndex];
+
+ // Sp marker frames are just annotations and should not be recorded in
+ // the profile.
+ if (!profilingStackFrame.isSpMarkerFrame()) {
+ if (aIsSynchronous && profilingStackFrame.categoryPair() ==
+ ProfilingCategoryPair::PROFILER) {
+ // For stacks captured synchronously (ie. marker stacks), stop
+ // walking the stack as soon as we enter the profiler category,
+ // to avoid showing profiler internal code in marker stacks.
+ return;
+ }
+ aCollector.CollectProfilingStackFrame(profilingStackFrame);
+ }
+ profilingStackIndex++;
+ continue;
+ }
+
+ // If we reach here, there must be a native stack frame and it must be the
+ // greatest frame.
+ if (nativeStackAddr) {
+ MOZ_ASSERT(nativeIndex >= 0);
+ void* addr = (void*)aNativeStack.mPCs[nativeIndex];
+ aCollector.CollectNativeLeafAddr(addr);
+ }
+ if (nativeIndex >= 0) {
+ nativeIndex--;
+ }
+ }
+}
+
+#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
+static HANDLE GetThreadHandle(PlatformData* aData);
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
+ void* aClosure) {
+ NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
+ MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
+ nativeStack->mSPs[nativeStack->mCount] = aSP;
+ nativeStack->mPCs[nativeStack->mCount] = aPC;
+ nativeStack->mCount++;
+}
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK)
+static void DoFramePointerBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ // Start with the current function. We use 0 as the frame number here because
+ // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
+ // but it doesn't matter because StackWalkCallback() doesn't use the frame
+ // number argument.
+ StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+ uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+ const void* stackEnd = aRegisteredThread.StackTop();
+ if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
+ FramePointerStackWalk(StackWalkCallback, maxFrames, &aNativeStack,
+ reinterpret_cast<void**>(aRegs.mFP),
+ const_cast<void*>(stackEnd));
+ }
+}
+#endif
+
+#if defined(USE_MOZ_STACK_WALK)
+static void DoMozStackWalkBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ // Start with the current function. We use 0 as the frame number here because
+ // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
+ // it doesn't matter because StackWalkCallback() doesn't use the frame number
+ // argument.
+ StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+ uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+ HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
+ MOZ_ASSERT(thread);
+ MozStackWalkThread(StackWalkCallback, maxFrames, &aNativeStack, thread,
+ /* context */ nullptr);
+}
+#endif
+
+#ifdef USE_EHABI_STACKWALK
+static void DoEHABIBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ aNativeStack.mCount =
+ EHABIStackWalk(aRegs.mContext->uc_mcontext,
+ const_cast<void*>(aRegisteredThread.StackTop()),
+ aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
+}
+#endif
+
+#ifdef USE_LUL_STACKWALK
+
+// See the comment at the callsite for why this function is necessary.
+# if defined(MOZ_HAVE_ASAN_IGNORE)
+MOZ_ASAN_IGNORE static void ASAN_memcpy(void* aDst, const void* aSrc,
+ size_t aLen) {
+ // The obvious thing to do here is call memcpy(). However, although
+ // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
+ // false positive still manifests! So we must implement memcpy() ourselves
+ // within this function.
+ char* dst = static_cast<char*>(aDst);
+ const char* src = static_cast<const char*>(aSrc);
+
+ for (size_t i = 0; i < aLen; i++) {
+ dst[i] = src[i];
+ }
+}
+# endif
+
+static void DoLULBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
+
+ lul::UnwindRegs startRegs;
+ memset(&startRegs, 0, sizeof(startRegs));
+
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+ startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
+ startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
+ startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
+# elif defined(GP_PLAT_amd64_freebsd)
+ startRegs.xip = lul::TaggedUWord(mc->mc_rip);
+ startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
+ startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
+# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
+ startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
+ startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
+ startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
+ startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
+ startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
+# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+ startRegs.pc = lul::TaggedUWord(mc->pc);
+ startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+ startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+ startRegs.sp = lul::TaggedUWord(mc->sp);
+# elif defined(GP_PLAT_arm64_freebsd)
+ startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
+ startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
+ startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
+ startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
+# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
+ startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
+ startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
+# elif defined(GP_PLAT_mips64_linux)
+ startRegs.pc = lul::TaggedUWord(mc->pc);
+ startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
+ startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
+# else
+# error "Unknown plat"
+# endif
+
+ // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
+ // stack's registered top point. Do some basic sanity checks too. This
+ // assumes that the TaggedUWord holding the stack pointer value is valid, but
+ // it should be, since it was constructed that way in the code just above.
+
+ // We could construct |stackImg| so that LUL reads directly from the stack in
+ // question, rather than from a copy of it. That would reduce overhead and
+ // space use a bit. However, it gives a problem with dynamic analysis tools
+ // (ASan, TSan, Valgrind) which is that such tools will report invalid or
+ // racing memory accesses, and such accesses will be reported deep inside LUL.
+ // By taking a copy here, we can either sanitise the copy (for Valgrind) or
+ // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
+ // to try and suppress errors inside LUL.
+ //
+ // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
+ // observed in some minutes of testing, whilst keeping the size of this
+ // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
+ // practice are small, 4KB or less, and so the copy costs are insignificant
+ // compared to other profiler overhead.
+ //
+ // |stackImg| is allocated on this (the sampling thread's) stack. That
+ // implies that the frame for this function is at least N_STACK_BYTES large.
+ // In general it would be considered unacceptable to have such a large frame
+ // on a stack, but it only exists for the unwinder thread, and so is not
+ // expected to be a problem. Allocating it on the heap is troublesome because
+ // this function runs whilst the sampled thread is suspended, so any heap
+ // allocation risks deadlock. Allocating it as a global variable is not
+ // thread safe, which would be a problem if we ever allow multiple sampler
+ // threads. Hence allocating it on the stack seems to be the least-worst
+ // option.
+
+ lul::StackImage stackImg;
+
+ {
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
+ defined(GP_PLAT_amd64_freebsd)
+ uintptr_t rEDZONE_SIZE = 128;
+ uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
+ defined(GP_PLAT_arm64_freebsd)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_mips64_linux)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+# else
+# error "Unknown plat"
+# endif
+ uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
+ uintptr_t ws = sizeof(void*);
+ start &= ~(ws - 1);
+ end &= ~(ws - 1);
+ uintptr_t nToCopy = 0;
+ if (start < end) {
+ nToCopy = end - start;
+ if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
+ }
+ MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+ stackImg.mLen = nToCopy;
+ stackImg.mStartAvma = start;
+ if (nToCopy > 0) {
+ // If this is a vanilla memcpy(), ASAN makes the following complaint:
+ //
+ // ERROR: AddressSanitizer: stack-buffer-underflow ...
+ // ...
+ // HINT: this may be a false positive if your program uses some custom
+ // stack unwind mechanism or swapcontext
+ //
+ // This code is very much a custom stack unwind mechanism! So we use an
+ // alternative memcpy() implementation that is ignored by ASAN.
+# if defined(MOZ_HAVE_ASAN_IGNORE)
+ ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+# else
+ memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+# endif
+ (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
+ }
+ }
+
+ size_t framePointerFramesAcquired = 0;
+ lul::LUL* lul = CorePS::Lul(aLock);
+ lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
+ reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
+ &aNativeStack.mCount, &framePointerFramesAcquired,
+ MAX_NATIVE_FRAMES, &startRegs, &stackImg);
+
+ // Update stats in the LUL stats object. Unfortunately this requires
+ // three global memory operations.
+ lul->mStats.mContext += 1;
+ lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
+ lul->mStats.mFP += framePointerFramesAcquired;
+}
+
+#endif
+
+#ifdef HAVE_NATIVE_UNWIND
+static void DoNativeBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // This method determines which stackwalker is used for periodic and
+ // synchronous samples. (Backtrace samples are treated differently, see
+ // profiler_suspend_and_sample_thread() for details). The only part of the
+ // ordering that matters is that LUL must precede FRAME_POINTER, because on
+ // Linux they can both be present.
+# if defined(USE_LUL_STACKWALK)
+ DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_EHABI_STACKWALK)
+ DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_FRAME_POINTER_STACK_WALK)
+ DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_MOZ_STACK_WALK)
+ DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# else
+# error "Invalid configuration"
+# endif
+}
+#endif
+
+// Writes some components shared by periodic and synchronous profiles to
+// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
+// and DoPeriodicSample().)
+//
+// The grammar for entry sequences is in a comment above
+// ProfileBuffer::StreamSamplesToJSON.
+static inline void DoSharedSample(
+ PSLockRef aLock, bool aIsSynchronous, RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
+ ProfileBuffer& aBuffer,
+ StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ MOZ_ASSERT(!aBuffer.IsThreadSafe(),
+ "Mutexes cannot be used inside this critical section");
+
+ MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
+
+ ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
+ NativeStack nativeStack;
+#if defined(HAVE_NATIVE_UNWIND)
+ if (ActivePS::FeatureStackWalk(aLock) &&
+ aCaptureOptions == StackCaptureOptions::Full) {
+ DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
+
+ MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+ aRegs, nativeStack, collector);
+ } else
+#endif
+ {
+ MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+ aRegs, nativeStack, collector);
+
+ // We can't walk the whole native stack, but we can record the top frame.
+ if (aCaptureOptions == StackCaptureOptions::Full) {
+ aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
+ }
+ }
+}
+
+// Writes the components of a synchronous sample to the given ProfileBuffer.
+static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Registers& aRegs,
+ ProfileBuffer& aBuffer,
+ StackCaptureOptions aCaptureOptions) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack,
+ "DoSyncSample should not be called when no capture is needed");
+
+ const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
+
+ const uint64_t samplePos =
+ aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
+
+ TimeDuration delta = aNow - CorePS::ProcessStartTime();
+ aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+ DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
+ samplePos, bufferRangeStart, aBuffer, aCaptureOptions);
+}
+
+// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
+// The ThreadId entry is already written in the main ProfileBuffer, its location
+// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
+static void DoPeriodicSample(PSLockRef aLock,
+ RegisteredThread& aRegisteredThread,
+ ProfiledThreadData& aProfiledThreadData,
+ const Registers& aRegs, uint64_t aSamplePos,
+ uint64_t aBufferRangeStart,
+ ProfileBuffer& aBuffer) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
+ aSamplePos, aBufferRangeStart, aBuffer);
+}
+
+#undef UNWINDING_REGS_HAVE_ECX_EDX
+#undef UNWINDING_REGS_HAVE_R10_R12
+#undef UNWINDING_REGS_HAVE_LR_R7
+#undef UNWINDING_REGS_HAVE_LR_R11
+
+// END sampling/unwinding code
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN saving/streaming code
+
+const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
+
+static int64_t SafeJSInteger(uint64_t aValue) {
+ return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
+}
+
+static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
+ const SharedLibrary& aLib) {
+ aWriter.StartObjectElement();
+ aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
+ aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
+ aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
+ aWriter.StringProperty("name", aLib.GetModuleName());
+ aWriter.StringProperty("path", aLib.GetModulePath());
+ aWriter.StringProperty("debugName", aLib.GetDebugName());
+ aWriter.StringProperty("debugPath", aLib.GetDebugPath());
+ aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
+ aWriter.StringProperty("codeId", aLib.GetCodeId());
+ aWriter.StringProperty("arch", aLib.GetArch());
+ aWriter.EndObject();
+}
+
+void AppendSharedLibraries(JSONWriter& aWriter) {
+ SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+ info.SortByAddress();
+ for (size_t i = 0; i < info.GetSize(); i++) {
+ AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
+ }
+}
+
+static void StreamCategories(SpliceableJSONWriter& aWriter) {
+ // Same order as ProfilingCategory. Format:
+ // [
+ // {
+ // name: "Idle",
+ // color: "transparent",
+ // subcategories: ["Other"],
+ // },
+ // {
+ // name: "Other",
+ // color: "grey",
+ // subcategories: [
+ // "JSM loading",
+ // "Subprocess launching",
+ // "DLL loading"
+ // ]
+ // },
+ // ...
+ // ]
+
+#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
+ aWriter.Start(); \
+ aWriter.StringProperty("name", labelAsString); \
+ aWriter.StringProperty("color", color); \
+ aWriter.StartArrayProperty("subcategories");
+#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
+ aWriter.StringElement(labelAsString);
+#define CATEGORY_JSON_END_CATEGORY \
+ aWriter.EndArray(); \
+ aWriter.EndObject();
+
+ MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
+ CATEGORY_JSON_SUBCATEGORY,
+ CATEGORY_JSON_END_CATEGORY)
+
+#undef CATEGORY_JSON_BEGIN_CATEGORY
+#undef CATEGORY_JSON_SUBCATEGORY
+#undef CATEGORY_JSON_END_CATEGORY
+}
+
+static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
+ // Get an array view with all registered marker-type-specific functions.
+ base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList
+ markerTypeFunctionsArray;
+ // List of streamed marker names, this is used to spot duplicates.
+ std::set<std::string> names;
+ // Stream the display schema for each different one. (Duplications may come
+ // from the same code potentially living in different libraries.)
+ for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
+ auto name = markerTypeFunctions.mMarkerTypeNameFunction();
+ // std::set.insert(T&&) returns a pair, its `second` is true if the element
+ // was actually inserted (i.e., it was not there yet.)
+ const bool didInsert =
+ names.insert(std::string(name.data(), name.size())).second;
+ if (didInsert) {
+ markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
+ }
+ }
+}
+
+static int64_t MicrosecondsSince1970();
+
+static void StreamMetaJSCustomObject(PSLockRef aLock,
+ SpliceableJSONWriter& aWriter,
+ bool aIsShuttingDown) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ aWriter.IntProperty("version", GECKO_PROFILER_FORMAT_VERSION);
+
+ // The "startTime" field holds the number of milliseconds since midnight
+ // January 1, 1970 GMT. This grotty code computes (Now - (Now -
+ // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
+ // Note: This is the only absolute time in the profile! All other timestamps
+ // are relative to this startTime.
+ TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
+ aWriter.DoubleProperty(
+ "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds());
+
+ aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() -
+ CorePS::ProcessStartTime())
+ .ToMilliseconds());
+
+ if (const TimeStamp contentEarliestTime =
+ ActivePS::Buffer(aLock)
+ .UnderlyingChunkedBuffer()
+ .GetEarliestChunkStartTimeStamp();
+ !contentEarliestTime.IsNull()) {
+ aWriter.DoubleProperty(
+ "contentEarliestTime",
+ (contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds());
+ } else {
+ aWriter.NullProperty("contentEarliestTime");
+ }
+
+ const double profilingEndTime = profiler_time();
+ aWriter.DoubleProperty("profilingEndTime", profilingEndTime);
+
+ if (aIsShuttingDown) {
+ aWriter.DoubleProperty("shutdownTime", profilingEndTime);
+ } else {
+ aWriter.NullProperty("shutdownTime");
+ }
+
+ aWriter.StartArrayProperty("categories");
+ StreamCategories(aWriter);
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("markerSchema");
+ StreamMarkerSchema(aWriter);
+ aWriter.EndArray();
+
+ if (!profiler_is_main_thread()) {
+ // Leave the rest of the properties out if we're not on the main thread.
+ // At the moment, the only case in which this function is called on a
+ // background thread is if we're in a content process and are going to
+ // send this profile to the parent process. In that case, the parent
+ // process profile's "meta" object already has the rest of the properties,
+ // and the parent process profile is dumped on that process's main thread.
+ return;
+ }
+
+ aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
+ aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
+
+#ifdef DEBUG
+ aWriter.IntProperty("debug", 1);
+#else
+ aWriter.IntProperty("debug", 0);
+#endif
+
+ aWriter.IntProperty("gcpoison", 0);
+
+ aWriter.IntProperty("asyncstack", 0);
+
+ aWriter.IntProperty("processType", 0);
+}
+
+static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ ActivePS::DiscardExpiredPages(aLock);
+ for (const auto& page : ActivePS::ProfiledPages(aLock)) {
+ page->StreamJSON(aWriter);
+ }
+}
+
+static void locked_profiler_stream_json_for_this_process(
+ PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
+ bool aIsShuttingDown, bool aOnlyThreads = false) {
+ LOG("locked_profiler_stream_json_for_this_process");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process);
+
+ const double collectionStartMs = profiler_time();
+
+ ProfileBuffer& buffer = ActivePS::Buffer(aLock);
+
+ // If there is a set "Window length", discard older data.
+ Maybe<double> durationS = ActivePS::Duration(aLock);
+ if (durationS.isSome()) {
+ const double durationStartMs = collectionStartMs - *durationS * 1000;
+ buffer.DiscardSamplesBeforeTime(durationStartMs);
+ }
+
+ if (!aOnlyThreads) {
+ // Put shared library info
+ aWriter.StartArrayProperty("libs");
+ AppendSharedLibraries(aWriter);
+ aWriter.EndArray();
+
+ // Put meta data
+ aWriter.StartObjectProperty("meta");
+ { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); }
+ aWriter.EndObject();
+
+ // Put page data
+ aWriter.StartArrayProperty("pages");
+ { StreamPages(aLock, aWriter); }
+ aWriter.EndArray();
+
+ buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
+ aSinceTime);
+ buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(),
+ aSinceTime);
+
+ // Lists the samples for each thread profile
+ aWriter.StartArrayProperty("threads");
+ }
+
+ // if aOnlyThreads is true, the only output will be the threads array items.
+ {
+ ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
+ Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
+ ActivePS::ProfiledThreads(aLock);
+ for (auto& thread : threads) {
+ ProfiledThreadData* profiledThreadData = thread.second;
+ profiledThreadData->StreamJSON(
+ buffer, aWriter, CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
+ CorePS::ProcessStartTime(), aSinceTime);
+ }
+ }
+
+ if (!aOnlyThreads) {
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("pausedRanges");
+ { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
+ aWriter.EndArray();
+ }
+
+ const double collectionEndMs = profiler_time();
+
+ // Record timestamps for the collection into the buffer, so that consumers
+ // know why we didn't collect any samples for its duration.
+ // We put these entries into the buffer after we've collected the profile,
+ // so they'll be visible for the *next* profile collection (if they haven't
+ // been overwritten due to buffer wraparound by then).
+ buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
+ buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
+}
+
+bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
+ double aSinceTime,
+ bool aIsShuttingDown,
+ bool aOnlyThreads) {
+ LOG("profiler_stream_json_for_this_process");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
+ aIsShuttingDown, aOnlyThreads);
+ return true;
+}
+
+// END saving/streaming code
+////////////////////////////////////////////////////////////////////////
+
+static char FeatureCategory(uint32_t aFeature) {
+ if (aFeature & DefaultFeatures()) {
+ if (aFeature & AvailableFeatures()) {
+ return 'D';
+ }
+ return 'd';
+ }
+
+ if (aFeature & StartupExtraDefaultFeatures()) {
+ if (aFeature & AvailableFeatures()) {
+ return 'S';
+ }
+ return 's';
+ }
+
+ if (aFeature & AvailableFeatures()) {
+ return '-';
+ }
+ return 'x';
+}
+
+static void PrintUsage() {
+ PrintToConsole(
+ "\n"
+ "Profiler environment variable usage:\n"
+ "\n"
+ " MOZ_BASE_PROFILER_HELP\n"
+ " If set to any value, prints this message.\n"
+ " (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n"
+ " for Gecko Profiler help, with more features).\n"
+ "\n"
+ " MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n"
+ " Enables BaseProfiler logging to stdout. The levels of logging\n"
+ " available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n"
+ " '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP\n"
+ " If set to any value other than '' or '0'/'N'/'n', starts the\n"
+ " profiler immediately on start-up.\n"
+ " Useful if you want profile code that runs very early.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n"
+ " per process in the profiler's circular buffer when the profiler is\n"
+ " first started.\n"
+ " If unset, the platform default is used:\n"
+ " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
+ " (%u bytes per entry -> %u or %u total bytes per process)\n"
+ " Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n"
+ " of entries in the the profiler's circular buffer when the profiler\n"
+ " is first started, in seconds.\n"
+ " If unset, the life time of the entries will only be restricted by\n"
+ " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
+ " additional time duration restriction will be applied.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
+ " measured in milliseconds, when the profiler is first started.\n"
+ " If unset, the platform default is used.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+ " features, as the integer value of the features bitfield.\n"
+ " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+ " features, as a comma-separated list of strings.\n"
+ " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
+ " If unset, the platform default is used.\n"
+ "\n"
+ " Features: (x=unavailable, D/d=default/unavailable,\n"
+ " S/s=MOZ_PROFILER_STARTUP extra "
+ "default/unavailable)\n",
+ unsigned(scMinimumBufferEntries), unsigned(scMaximumBufferEntries),
+ unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()),
+ unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
+ unsigned(scBytesPerEntry),
+ unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
+ unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() *
+ scBytesPerEntry));
+
+#define PRINT_FEATURE(n_, str_, Name_, desc_) \
+ PrintToConsole(" %c %7u: \"%s\" (%s)\n", \
+ FeatureCategory(ProfilerFeature::Name_), \
+ ProfilerFeature::Name_, str_, desc_);
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
+
+#undef PRINT_FEATURE
+
+ PrintToConsole(
+ " - \"default\" (All above D+S defaults)\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as "
+ "a\n"
+ " comma-separated list of strings. A given thread will be sampled if\n"
+ " any of the filters is a case-insensitive substring of the thread\n"
+ " name. If unset, a default is used.\n"
+ "\n"
+ " MOZ_PROFILER_SHUTDOWN\n"
+ " If set, the profiler saves a profile to the named file on shutdown.\n"
+ "\n"
+ " MOZ_PROFILER_SYMBOLICATE\n"
+ " If set, the profiler will pre-symbolicate profiles.\n"
+ " *Note* This will add a significant pause when gathering data, and\n"
+ " is intended mainly for local development.\n"
+ "\n"
+ " MOZ_PROFILER_LUL_TEST\n"
+ " If set to any value, runs LUL unit tests at startup.\n"
+ "\n"
+ " This platform %s native unwinding.\n"
+ "\n",
+#if defined(HAVE_NATIVE_UNWIND)
+ "supports"
+#else
+ "does not support"
+#endif
+ );
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+struct SigHandlerCoordinator;
+#endif
+
+// Sampler performs setup and teardown of the state required to sample with the
+// profiler. Sampler may exist when ActivePS is not present.
+//
+// SuspendAndSampleAndResumeThread must only be called from a single thread,
+// and must not sample the thread it is being called from. A separate Sampler
+// instance must be used for each thread which wants to capture samples.
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// With the exception of SamplerThread, all Sampler objects must be Disable-d
+// before releasing the lock which was used to create them. This avoids races
+// on linux with the SIGPROF signal handler.
+
+class Sampler {
+ public:
+ // Sets up the profiler such that it can begin sampling.
+ explicit Sampler(PSLockRef aLock);
+
+ // Disable the sampler, restoring it to its previous state. This must be
+ // called once, and only once, before the Sampler is destroyed.
+ void Disable(PSLockRef aLock);
+
+ // This method suspends and resumes the samplee thread. It calls the passed-in
+ // function-like object aProcessRegs (passing it a populated |const
+ // Registers&| arg) while the samplee thread is suspended.
+ //
+ // Func must be a function-like object of type `void()`.
+ template <typename Func>
+ void SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs);
+
+ private:
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+ // Used to restore the SIGPROF handler when ours is removed.
+ struct sigaction mOldSigprofHandler;
+
+ // This process' ID. Needed as an argument for tgkill in
+ // SuspendAndSampleAndResumeThread.
+ BaseProfilerProcessId mMyPid;
+
+ // The sampler thread's ID. Used to assert that it is not sampling itself,
+ // which would lead to deadlock.
+ BaseProfilerThreadId mSamplerTid;
+
+ public:
+ // This is the one-and-only variable used to communicate between the sampler
+ // thread and the samplee thread's signal handler. It's static because the
+ // samplee thread's signal handler is static.
+ static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+};
+
+// END Sampler
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread
+
+// The sampler thread controls sampling and runs whenever the profiler is
+// active. It periodically runs through all registered threads, finds those
+// that should be sampled, then pauses and samples them.
+
+class SamplerThread {
+ public:
+ // Creates a sampler thread, but doesn't start it.
+ SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds, uint32_t aFeatures);
+ ~SamplerThread();
+
+ // This runs on (is!) the sampler thread.
+ void Run();
+
+ // This runs on the main thread.
+ void Stop(PSLockRef aLock);
+
+ private:
+ // This suspends the calling thread for the given number of microseconds.
+ // Best effort timing.
+ void SleepMicro(uint32_t aMicroseconds);
+
+ // The sampler used to suspend and sample threads.
+ Sampler mSampler;
+
+ // The activity generation, for detecting when the sampler thread must stop.
+ const uint32_t mActivityGeneration;
+
+ // The interval between samples, measured in microseconds.
+ const int mIntervalMicroseconds;
+
+ // The OS-specific handle for the sampler thread.
+#if defined(GP_OS_windows)
+ HANDLE mThread;
+#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
+ defined(GP_OS_android) || defined(GP_OS_freebsd)
+ pthread_t mThread;
+#endif
+
+#if defined(GP_OS_windows)
+ bool mNoTimerResolutionChange = true;
+#endif
+
+ SamplerThread(const SamplerThread&) = delete;
+ void operator=(const SamplerThread&) = delete;
+};
+
+// This function is required because we need to create a SamplerThread within
+// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
+// could probably be removed by moving some code around.
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+ double aInterval, uint32_t aFeatures) {
+ return new SamplerThread(aLock, aGeneration, aInterval, aFeatures);
+}
+
+// This function is the sampler thread. This implementation is used for all
+// targets.
+void SamplerThread::Run() {
+ // TODO: If possible, name this thread later on, after NSPR becomes available.
+ // PR_SetCurrentThreadName("SamplerThread");
+
+ // Features won't change during this SamplerThread's lifetime, so we can read
+ // them once and store them locally.
+ const uint32_t features = []() -> uint32_t {
+ PSAutoLock lock;
+ if (!ActivePS::Exists(lock)) {
+ // If there is no active profiler, it doesn't matter what we return,
+ // because this thread will exit before any feature is used.
+ return 0;
+ }
+ return ActivePS::Features(lock);
+ }();
+
+ // Not *no*-stack-sampling means we do want stack sampling.
+ const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
+
+ // Use local ProfileBuffer to capture the stack.
+ // (This is to avoid touching the CorePS::CoreBuffer lock while
+ // a thread is suspended, because that thread could be working with
+ // the CorePS::CoreBuffer as well.)
+ ProfileBufferChunkManagerSingle localChunkManager(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize);
+ ProfileChunkedBuffer localBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
+ ProfileBuffer localProfileBuffer(localBuffer);
+
+ // Will be kept between collections, to know what each collection does.
+ auto previousState = localBuffer.GetState();
+
+ // This will be positive if we are running behind schedule (sampling less
+ // frequently than desired) and negative if we are ahead of schedule.
+ TimeDuration lastSleepOvershoot = 0;
+ TimeStamp sampleStart = TimeStamp::Now();
+
+ while (true) {
+ // This scope is for |lock|. It ends before we sleep below.
+ {
+ PSAutoLock lock;
+ TimeStamp lockAcquired = TimeStamp::Now();
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ // At this point profiler_stop() might have been called, and
+ // profiler_start() might have been called on another thread. If this
+ // happens the generation won't match.
+ if (ActivePS::Generation(lock) != mActivityGeneration) {
+ return;
+ }
+
+ ActivePS::ClearExpiredExitProfiles(lock);
+
+ TimeStamp expiredMarkersCleaned = TimeStamp::Now();
+
+ if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) {
+ TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
+ ProfileBuffer& buffer = ActivePS::Buffer(lock);
+
+ // handle per-process generic counters
+ const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
+ for (auto& counter : counters) {
+ // create Buffer entries for each counter
+ buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
+ buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+ int64_t count;
+ uint64_t number;
+ counter->Sample(count, number);
+ buffer.AddEntry(ProfileBufferEntry::Count(count));
+ if (number) {
+ buffer.AddEntry(ProfileBufferEntry::Number(number));
+ }
+ }
+ TimeStamp countersSampled = TimeStamp::Now();
+
+ if (stackSampling) {
+ const Vector<LiveProfiledThreadData>& liveThreads =
+ ActivePS::LiveProfiledThreads(lock);
+
+ for (auto& thread : liveThreads) {
+ RegisteredThread* registeredThread = thread.mRegisteredThread;
+ ProfiledThreadData* profiledThreadData =
+ thread.mProfiledThreadData.get();
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ // If the thread is asleep and has been sampled before in the same
+ // sleep episode, find and copy the previous sample, as that's
+ // cheaper than taking a new sample.
+ if (registeredThread->RacyRegisteredThread()
+ .CanDuplicateLastSampleDueToSleep()) {
+ bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
+ info->ThreadId(), CorePS::ProcessStartTime(),
+ profiledThreadData->LastSample());
+ if (dup_ok) {
+ continue;
+ }
+ }
+
+ AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
+
+ TimeStamp now = TimeStamp::Now();
+
+ // Record the global profiler buffer's range start now, before
+ // adding the first entry for this thread's sample.
+ const uint64_t bufferRangeStart = buffer.BufferRangeStart();
+
+ // Add the thread ID now, so we know its position in the main
+ // buffer, which is used by some JS data. (DoPeriodicSample only
+ // knows about the temporary local buffer.)
+ const uint64_t samplePos =
+ buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
+ profiledThreadData->LastSample() = Some(samplePos);
+
+ // Also add the time, so it's always there after the thread ID, as
+ // expected by the parser. (Other stack data is optional.)
+ TimeDuration delta = now - CorePS::ProcessStartTime();
+ buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+ mSampler.SuspendAndSampleAndResumeThread(
+ lock, *registeredThread, now,
+ [&](const Registers& aRegs, const TimeStamp& aNow) {
+ DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
+ aRegs, samplePos, bufferRangeStart,
+ localProfileBuffer);
+ });
+
+ // If data is complete, copy it into the global buffer.
+ auto state = localBuffer.GetState();
+ if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
+ LOG("Stack sample too big for local storage, needed %u bytes",
+ unsigned(state.mRangeEnd - previousState.mRangeEnd));
+ } else if (state.mRangeEnd - previousState.mRangeEnd >=
+ *profiler_get_core_buffer().BufferLength()) {
+ LOG("Stack sample too big for profiler storage, needed %u bytes",
+ unsigned(state.mRangeEnd - previousState.mRangeEnd));
+ } else {
+ profiler_get_core_buffer().AppendContents(localBuffer);
+ }
+
+ // Clean up for the next run.
+ localBuffer.Clear();
+ previousState = localBuffer.GetState();
+ }
+ }
+
+#if defined(USE_LUL_STACKWALK)
+ // The LUL unwind object accumulates frame statistics. Periodically we
+ // should poke it to give it a chance to print those statistics. This
+ // involves doing I/O (fprintf, __android_log_print, etc.) and so
+ // can't safely be done from the critical section inside
+ // SuspendAndSampleAndResumeThread, which is why it is done here.
+ lul::LUL* lul = CorePS::Lul(lock);
+ if (lul) {
+ lul->MaybeShowStats();
+ }
+#endif
+ TimeStamp threadsSampled = TimeStamp::Now();
+
+ {
+ AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
+ ActivePS::FulfillChunkRequests(lock);
+ }
+
+ buffer.CollectOverheadStats(delta, lockAcquired - sampleStart,
+ expiredMarkersCleaned - lockAcquired,
+ countersSampled - expiredMarkersCleaned,
+ threadsSampled - countersSampled);
+ }
+ }
+ // gPSMutex is not held after this point.
+
+ // Calculate how long a sleep to request. After the sleep, measure how
+ // long we actually slept and take the difference into account when
+ // calculating the sleep interval for the next iteration. This is an
+ // attempt to keep "to schedule" in the presence of inaccuracy of the
+ // actual sleep intervals.
+ TimeStamp targetSleepEndTime =
+ sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
+ TimeStamp beforeSleep = TimeStamp::Now();
+ TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+ double sleepTime = std::max(
+ 0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds());
+ SleepMicro(static_cast<uint32_t>(sleepTime));
+ sampleStart = TimeStamp::Now();
+ lastSleepOvershoot =
+ sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+ }
+}
+
+// Temporary closing namespaces from enclosing platform.cpp.
+} // namespace baseprofiler
+} // namespace mozilla
+
+// We #include these files directly because it means those files can use
+// declarations from this file trivially. These provide target-specific
+// implementations of all SamplerThread methods except Run().
+#if defined(GP_OS_windows)
+# include "platform-win32.cpp"
+#elif defined(GP_OS_darwin)
+# include "platform-macos.cpp"
+#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include "platform-linux-android.cpp"
+#else
+# error "bad platform"
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+UniquePlatformData AllocPlatformData(BaseProfilerThreadId aThreadId) {
+ return UniquePlatformData(new PlatformData(aThreadId));
+}
+
+void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
+
+// END SamplerThread
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN externally visible functions
+
+static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
+ if (strcmp(aFeature, "default") == 0) {
+ return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
+ : DefaultFeatures()) &
+ AvailableFeatures();
+ }
+
+#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
+ if (strcmp(aFeature, str_) == 0) { \
+ return ProfilerFeature::Name_; \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
+
+#undef PARSE_FEATURE_BIT
+
+ PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature);
+ // Since we may have an old feature we don't implement anymore, don't exit.
+ PrintUsage();
+ return 0;
+}
+
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+ uint32_t aFeatureCount,
+ bool aIsStartup /* = false */) {
+ uint32_t features = 0;
+ for (size_t i = 0; i < aFeatureCount; i++) {
+ features |= ParseFeature(aFeatures[i], aIsStartup);
+ }
+ return features;
+}
+
+// Find the RegisteredThread for the current thread. This should only be called
+// in places where TLSRegisteredThread can't be used.
+static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) {
+ BaseProfilerThreadId id = profiler_current_thread_id();
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(aLock);
+ for (auto& registeredThread : registeredThreads) {
+ if (registeredThread->Info()->ThreadId() == id) {
+ return registeredThread.get();
+ }
+ }
+
+ return nullptr;
+}
+
+static ProfilingStack* locked_register_thread(PSLockRef aLock,
+ const char* aName,
+ void* aStackTop) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ MOZ_ASSERT(!FindCurrentThreadRegisteredThread(aLock));
+
+ VTUNE_REGISTER_THREAD(aName);
+
+ if (!TLSRegisteredThread::Init(aLock)) {
+ return nullptr;
+ }
+
+ RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(),
+ profiler_is_main_thread());
+ UniquePtr<RegisteredThread> registeredThread =
+ MakeUnique<RegisteredThread>(info, aStackTop);
+
+ TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());
+
+ if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+ ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+ MakeUnique<ProfiledThreadData>(info));
+ }
+
+ ProfilingStack* profilingStack =
+ &registeredThread->RacyRegisteredThread().ProfilingStack();
+
+ CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
+
+ return profilingStack;
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+ double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration);
+
+static Vector<const char*> SplitAtCommas(const char* aString,
+ UniquePtr<char[]>& aStorage) {
+ size_t len = strlen(aString);
+ aStorage = MakeUnique<char[]>(len + 1);
+ PodCopy(aStorage.get(), aString, len + 1);
+
+ // Iterate over all characters in aStorage and split at commas, by
+ // overwriting commas with the null char.
+ Vector<const char*> array;
+ size_t currentElementStart = 0;
+ for (size_t i = 0; i <= len; i++) {
+ if (aStorage[i] == ',') {
+ aStorage[i] = '\0';
+ }
+ if (aStorage[i] == '\0') {
+ // Only add non-empty elements, otherwise ParseFeatures would later
+ // complain about unrecognized features.
+ if (currentElementStart != i) {
+ MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
+ }
+ currentElementStart = i + 1;
+ }
+ }
+ return array;
+}
+
+static const char* get_size_suffix(const char* str) {
+ const char* ptr = str;
+
+ while (isdigit(*ptr)) {
+ ptr++;
+ }
+
+ return ptr;
+}
+
+void profiler_init(void* aStackTop) {
+ LOG("profiler_init");
+
+ profiler_init_main_thread_id();
+
+ VTUNE_INIT();
+
+ MOZ_RELEASE_ASSERT(!CorePS::Exists());
+
+ if (getenv("MOZ_BASE_PROFILER_HELP")) {
+ PrintUsage();
+ exit(0);
+ }
+
+ SharedLibraryInfo::Initialize();
+
+ uint32_t features = DefaultFeatures() & AvailableFeatures();
+
+ UniquePtr<char[]> filterStorage;
+
+ Vector<const char*> filters;
+ MOZ_RELEASE_ASSERT(filters.append(kMainThreadName));
+
+ PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES;
+ Maybe<double> duration = Nothing();
+ double interval = BASE_PROFILER_DEFAULT_INTERVAL;
+
+ {
+ PSAutoLock lock;
+
+ // We've passed the possible failure point. Instantiate CorePS, which
+ // indicates that the profiler has initialized successfully.
+ CorePS::Create(lock);
+
+ Unused << locked_register_thread(lock, kMainThreadName, aStackTop);
+
+ // Platform-specific initialization.
+ PlatformInit(lock);
+
+ // (Linux-only) We could create CorePS::mLul and read unwind info into it
+ // at this point. That would match the lifetime implied by destruction of
+ // it in profiler_shutdown() just below. However, that gives a big delay on
+ // startup, even if no profiling is actually to be done. So, instead, it is
+ // created on demand at the first call to PlatformStart().
+
+ const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
+ if (!startupEnv || startupEnv[0] == '\0' ||
+ ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
+ startupEnv[0] == 'n') &&
+ startupEnv[1] == '\0')) {
+ return;
+ }
+
+ // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
+ // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
+ // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
+ if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
+ return;
+ }
+
+ LOG("- MOZ_PROFILER_STARTUP is set");
+
+ // Startup default capacity may be different.
+ capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
+
+ const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
+ if (startupCapacity && startupCapacity[0] != '\0') {
+ errno = 0;
+ long capacityLong = strtol(startupCapacity, nullptr, 10);
+ std::string_view sizeSuffix = get_size_suffix(startupCapacity);
+
+ if (sizeSuffix == "KB") {
+ capacityLong *= 1000 / scBytesPerEntry;
+ } else if (sizeSuffix == "KiB") {
+ capacityLong *= 1024 / scBytesPerEntry;
+ } else if (sizeSuffix == "MB") {
+ capacityLong *= (1000 * 1000) / scBytesPerEntry;
+ } else if (sizeSuffix == "MiB") {
+ capacityLong *= (1024 * 1024) / scBytesPerEntry;
+ } else if (sizeSuffix == "GB") {
+ capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry;
+ } else if (sizeSuffix == "GiB") {
+ capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry;
+ } else if (!sizeSuffix.empty()) {
+ PrintToConsole(
+ "- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the "
+ "following: KB, KiB, MB, MiB, GB, GiB");
+ PrintUsage();
+ exit(1);
+ }
+
+ // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
+ // the maximum 32-bit signed number (as more than that is clamped down to
+ // 2^31 anyway).
+ if (errno == 0 && capacityLong > 0 &&
+ static_cast<uint64_t>(capacityLong) <=
+ static_cast<uint64_t>(INT32_MAX)) {
+ capacity = PowerOfTwo32(
+ ClampToAllowedEntries(static_cast<uint32_t>(capacityLong)));
+ LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
+ startupCapacity);
+ PrintUsage();
+ exit(1);
+ }
+ }
+
+ const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
+ if (startupDuration && startupDuration[0] != '\0') {
+ // The duration is a floating point number. Use StringToDouble rather than
+ // strtod, so that "." is used as the decimal separator regardless of OS
+ // locale.
+ auto durationVal = StringToDouble(std::string(startupDuration));
+ if (durationVal && *durationVal >= 0.0) {
+ if (*durationVal > 0.0) {
+ duration = Some(*durationVal);
+ }
+ LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal);
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
+ startupDuration);
+ PrintUsage();
+ exit(1);
+ }
+ }
+
+ const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
+ if (startupInterval && startupInterval[0] != '\0') {
+ // The interval is a floating point number. Use StringToDouble rather than
+ // strtod, so that "." is used as the decimal separator regardless of OS
+ // locale.
+ auto intervalValue = StringToDouble(MakeStringSpan(startupInterval));
+ if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) {
+ interval = *intervalValue;
+ LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
+ startupInterval);
+ PrintUsage();
+ exit(1);
+ }
+ }
+
+ features |= StartupExtraDefaultFeatures() & AvailableFeatures();
+
+ const char* startupFeaturesBitfield =
+ getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
+ if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
+ errno = 0;
+ features = strtol(startupFeaturesBitfield, nullptr, 10);
+ if (errno == 0) {
+ LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
+ } else {
+ PrintToConsole(
+ "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
+ startupFeaturesBitfield);
+ PrintUsage();
+ exit(1);
+ }
+ } else {
+ const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
+ if (startupFeatures) {
+ // Interpret startupFeatures as a list of feature strings, separated by
+ // commas.
+ UniquePtr<char[]> featureStringStorage;
+ Vector<const char*> featureStringArray =
+ SplitAtCommas(startupFeatures, featureStringStorage);
+ features = ParseFeaturesFromStringArray(featureStringArray.begin(),
+ featureStringArray.length(),
+ /* aIsStartup */ true);
+ LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
+ }
+ }
+
+ const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
+ if (startupFilters && startupFilters[0] != '\0') {
+ filters = SplitAtCommas(startupFilters, filterStorage);
+ LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
+
+ if (mozilla::profiler::detail::FiltersExcludePid(filters)) {
+ LOG(" -> This process is excluded and won't be profiled");
+ return;
+ }
+ }
+
+ locked_profiler_start(lock, capacity, interval, features, filters.begin(),
+ filters.length(), duration);
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // call
+ // // profiler_add_sampled_counter which would attempt to take the lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+ const char* aFilename,
+ bool aIsShuttingDown);
+
+static SamplerThread* locked_profiler_stop(PSLockRef aLock);
+
+void profiler_shutdown() {
+ LOG("profiler_shutdown");
+
+ VTUNE_SHUTDOWN();
+
+ MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ // If the profiler is active we must get a handle to the SamplerThread before
+ // ActivePS is destroyed, in order to delete it.
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Save the profile on shutdown if requested.
+ if (ActivePS::Exists(lock)) {
+ const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
+ if (filename && filename[0] != '\0') {
+ locked_profiler_save_profile_to_file(lock, filename,
+ /* aIsShuttingDown */ true);
+ }
+
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ CorePS::Destroy(lock);
+
+ // We just destroyed CorePS and the ThreadInfos it contains, so we can
+ // clear this thread's TLSRegisteredThread.
+ TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+ }
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
+ double aSinceTime, bool aIsShuttingDown,
+ bool aOnlyThreads = false) {
+ LOG("WriteProfileToJSONWriter");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ if (!aOnlyThreads) {
+ aWriter.Start();
+ {
+ if (!profiler_stream_json_for_this_process(
+ aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+ return false;
+ }
+
+ // Don't include profiles from other processes because this is a
+ // synchronous function.
+ aWriter.StartArrayProperty("processes");
+ aWriter.EndArray();
+ }
+ aWriter.End();
+ } else {
+ aWriter.StartBareList();
+ if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
+ aIsShuttingDown, aOnlyThreads)) {
+ return false;
+ }
+ aWriter.EndBareList();
+ }
+ return true;
+}
+
+void profiler_set_process_name(const std::string& aProcessName,
+ const std::string* aETLDplus1) {
+ LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.c_str(),
+ aETLDplus1 ? aETLDplus1->c_str() : "<none>");
+ PSAutoLock lock;
+ CorePS::SetProcessName(lock, aProcessName);
+ if (aETLDplus1) {
+ CorePS::SetETLDplus1(lock, *aETLDplus1);
+ }
+}
+
+UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown,
+ bool aOnlyThreads) {
+ LOG("profiler_get_profile");
+
+ SpliceableChunkedJSONWriter b{FailureLatchInfallibleSource::Singleton()};
+ if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+ return nullptr;
+ }
+ return b.ChunkedWriteFunc().CopyData();
+}
+
+void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
+ double* aInterval, uint32_t* aFeatures,
+ Vector<const char*>* aFilters) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) {
+ return;
+ }
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ *aCapacity = 0;
+ *aDuration = Nothing();
+ *aInterval = 0;
+ *aFeatures = 0;
+ aFilters->clear();
+ return;
+ }
+
+ *aCapacity = ActivePS::Capacity(lock).Value();
+ *aDuration = ActivePS::Duration(lock);
+ *aInterval = ActivePS::Interval(lock);
+ *aFeatures = ActivePS::Features(lock);
+
+ const Vector<std::string>& filters = ActivePS::Filters(lock);
+ MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
+ for (uint32_t i = 0; i < filters.length(); ++i) {
+ (*aFilters)[i] = filters[i].c_str();
+ }
+}
+
+void GetProfilerEnvVarsForChildProcess(
+ std::function<void(const char* key, const char* value)>&& aSetEnv) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ aSetEnv("MOZ_PROFILER_STARTUP", "");
+ return;
+ }
+
+ aSetEnv("MOZ_PROFILER_STARTUP", "1");
+ auto capacityString =
+ Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
+ aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
+
+ // Use AppendFloat instead of Smprintf with %f because the decimal
+ // separator used by %f is locale-dependent. But the string we produce needs
+ // to be parseable by strtod, which only accepts the period character as a
+ // decimal separator. AppendFloat always uses the period character.
+ std::string intervalString = std::to_string(ActivePS::Interval(lock));
+ aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str());
+
+ auto featuresString = Smprintf("%d", ActivePS::Features(lock));
+ aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
+
+ std::string filtersString;
+ const Vector<std::string>& filters = ActivePS::Filters(lock);
+ for (uint32_t i = 0; i < filters.length(); ++i) {
+ filtersString += filters[i];
+ if (i != filters.length() - 1) {
+ filtersString += ",";
+ }
+ }
+ aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
+}
+
+void profiler_received_exit_profile(const std::string& aExitProfile) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ PSAutoLock lock;
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+ ActivePS::AddExitProfile(lock, aExitProfile);
+}
+
+Vector<std::string> profiler_move_exit_profiles() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ PSAutoLock lock;
+ Vector<std::string> profiles;
+ if (ActivePS::Exists(lock)) {
+ profiles = ActivePS::MoveExitProfiles(lock);
+ }
+ return profiles;
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+ const char* aFilename,
+ bool aIsShuttingDown = false) {
+ LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ std::ofstream stream;
+ stream.open(aFilename);
+ if (stream.is_open()) {
+ OStreamJSONWriteFunc jw(stream);
+ SpliceableJSONWriter w(jw, FailureLatchInfallibleSource::Singleton());
+ w.Start();
+ {
+ locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
+ aIsShuttingDown);
+
+ w.StartArrayProperty("processes");
+ Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock);
+ for (auto& exitProfile : exitProfiles) {
+ if (!exitProfile.empty()) {
+ w.Splice(exitProfile);
+ }
+ }
+ w.EndArray();
+ }
+ w.End();
+
+ stream.close();
+ }
+}
+
+void baseprofiler_save_profile_to_file(const char* aFilename) {
+ LOG("baseprofiler_save_profile_to_file(%s)", aFilename);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ locked_profiler_save_profile_to_file(lock, aFilename);
+}
+
+uint32_t profiler_get_available_features() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ return AvailableFeatures();
+}
+
+Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return Nothing();
+ }
+
+ return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
+}
+
+// This basically duplicates AutoProfilerLabel's constructor.
+static void* MozGlueBaseLabelEnter(const char* aLabel,
+ const char* aDynamicString, void* aSp) {
+ ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
+ if (profilingStack) {
+ profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
+ ProfilingCategoryPair::OTHER);
+ }
+ return profilingStack;
+}
+
+// This basically duplicates AutoProfilerLabel's destructor.
+static void MozGlueBaseLabelExit(void* sProfilingStack) {
+ if (sProfilingStack) {
+ reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop();
+ }
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+ double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration) {
+ const TimeStamp profilingStartTime = TimeStamp::Now();
+
+ if (LOG_TEST) {
+ LOG("locked_profiler_start");
+ LOG("- capacity = %d", int(aCapacity.Value()));
+ LOG("- duration = %.2f", aDuration ? *aDuration : -1);
+ LOG("- interval = %.2f", aInterval);
+
+#define LOG_FEATURE(n_, str_, Name_, desc_) \
+ if (ProfilerFeature::Has##Name_(aFeatures)) { \
+ LOG("- feature = %s", str_); \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
+
+#undef LOG_FEATURE
+
+ for (uint32_t i = 0; i < aFilterCount; i++) {
+ LOG("- threads = %s", aFilters[i]);
+ }
+ }
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
+
+ mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker();
+
+#if defined(GP_PLAT_amd64_windows) || defined(GP_PLAT_arm64_windows)
+ mozilla::WindowsStackWalkInitialization();
+#endif
+
+ // Fall back to the default values if the passed-in values are unreasonable.
+ // We want to be able to store at least one full stack.
+ // TODO: Review magic numbers.
+ PowerOfTwo32 capacity =
+ (aCapacity.Value() >=
+ ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
+ ? aCapacity
+ : BASE_PROFILER_DEFAULT_ENTRIES;
+ Maybe<double> duration = aDuration;
+
+ if (aDuration && *aDuration <= 0) {
+ duration = Nothing();
+ }
+ double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL;
+
+ ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures,
+ aFilters, aFilterCount, duration);
+
+ // Set up profiling for each registered thread, if appropriate.
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(aLock);
+ for (auto& registeredThread : registeredThreads) {
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ if (ActivePS::ShouldProfileThread(aLock, info)) {
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+ ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+ MakeUnique<ProfiledThreadData>(info));
+ registeredThread->RacyRegisteredThread().ReinitializeOnResume();
+ }
+ }
+
+ // Setup support for pushing/popping labels in mozglue.
+ RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit);
+
+ // At the very end, set up RacyFeatures.
+ RacyFeatures::SetActive(ActivePS::Features(aLock));
+}
+
+void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount, const Maybe<double>& aDuration) {
+ LOG("profiler_start");
+
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Initialize if necessary.
+ if (!CorePS::Exists()) {
+ profiler_init(nullptr);
+ }
+
+ // Reset the current state if the profiler is running.
+ if (ActivePS::Exists(lock)) {
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // call
+ // // profiler_add_sampled_counter which would attempt to take the lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount,
+ const Maybe<double>& aDuration) {
+ LOG("profiler_ensure_started");
+
+ // bool startedProfiler = false; (See TODO below)
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Initialize if necessary.
+ if (!CorePS::Exists()) {
+ profiler_init(nullptr);
+ }
+
+ if (ActivePS::Exists(lock)) {
+ // The profiler is active.
+ if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
+ aFilters, aFilterCount)) {
+ // Stop and restart with different settings.
+ samplerThread = locked_profiler_stop(lock);
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ // startedProfiler = true; (See TODO below)
+ }
+ } else {
+ // The profiler is stopped.
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ // startedProfiler = true; (See TODO below)
+ }
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // // call profiler_add_sampled_counter which would attempt to take the
+ // // lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
+ LOG("locked_profiler_stop");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ // At the very start, clear RacyFeatures.
+ RacyFeatures::SetInactive();
+
+ // TODO: Uninstall memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // mozilla::profiler::install_memory_counter(false);
+ // #endif
+
+ // Remove support for pushing/popping labels in mozglue.
+ RegisterProfilerLabelEnterExit(nullptr, nullptr);
+
+ // Stop sampling live threads.
+ const Vector<LiveProfiledThreadData>& liveProfiledThreads =
+ ActivePS::LiveProfiledThreads(aLock);
+ for (auto& thread : liveProfiledThreads) {
+ RegisteredThread* registeredThread = thread.mRegisteredThread;
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
+ }
+
+ // The Stop() call doesn't actually stop Run(); that happens in this
+ // function's caller when the sampler thread is destroyed. Stop() just gives
+ // the SamplerThread a chance to do some cleanup with gPSMutex locked.
+ SamplerThread* samplerThread = ActivePS::Destroy(aLock);
+ samplerThread->Stop(aLock);
+
+ mozilla::base_profiler_markers_detail::ReleaseBufferForMainThreadAddMarker();
+
+ return samplerThread;
+}
+
+void profiler_stop() {
+ LOG("profiler_stop");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ SamplerThread* samplerThread;
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
+ // would be waiting here with gPSMutex locked for SamplerThread::Run() to
+ // return so the join operation within the destructor can complete, but Run()
+ // needs to lock gPSMutex to return.
+ //
+ // Because this call occurs with gPSMutex unlocked, it -- including the final
+ // iteration of Run()'s loop -- must be able detect deactivation and return
+ // in a way that's safe with respect to other gPSMutex-locking operations
+ // that may have occurred in the meantime.
+ delete samplerThread;
+}
+
+bool profiler_is_paused() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ return ActivePS::IsPaused(lock);
+}
+
+void profiler_pause() {
+ LOG("profiler_pause");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ RacyFeatures::SetPaused();
+ ActivePS::SetIsPaused(lock, true);
+ ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
+ }
+}
+
+void profiler_resume() {
+ LOG("profiler_resume");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::Resume(profiler_time()));
+ ActivePS::SetIsPaused(lock, false);
+ RacyFeatures::SetUnpaused();
+ }
+}
+
+bool profiler_is_sampling_paused() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ return ActivePS::IsSamplingPaused(lock);
+}
+
+void profiler_pause_sampling() {
+ LOG("profiler_pause_sampling");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ RacyFeatures::SetSamplingPaused();
+ ActivePS::SetIsSamplingPaused(lock, true);
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::PauseSampling(profiler_time()));
+ }
+}
+
+void profiler_resume_sampling() {
+ LOG("profiler_resume_sampling");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::ResumeSampling(profiler_time()));
+ ActivePS::SetIsSamplingPaused(lock, false);
+ RacyFeatures::SetSamplingUnpaused();
+ }
+}
+
+bool profiler_feature_active(uint32_t aFeature) {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ // This function is hot enough that we use RacyFeatures, not ActivePS.
+ return RacyFeatures::IsActiveWithFeature(aFeature);
+}
+
+bool profiler_active_without_feature(uint32_t aFeature) {
+ // This function runs both on and off the main thread.
+
+ // This function is hot enough that we use RacyFeatures, not ActivePS.
+ return RacyFeatures::IsActiveWithoutFeature(aFeature);
+}
+
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
+ DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
+ PSAutoLock lock;
+ CorePS::AppendCounter(lock, aCounter);
+}
+
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
+ DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
+ PSAutoLock lock;
+ // Note: we don't enforce a final sample, though we could do so if the
+ // profiler was active
+ CorePS::RemoveCounter(lock, aCounter);
+}
+
+ProfilingStack* profiler_register_thread(const char* aName,
+ void* aGuessStackTop) {
+ DEBUG_LOG("profiler_register_thread(%s)", aName);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (RegisteredThread* thread = FindCurrentThreadRegisteredThread(lock);
+ thread) {
+ LOG("profiler_register_thread(%s) - thread %" PRIu64
+ " already registered as %s",
+ aName, uint64_t(profiler_current_thread_id().ToNumber()),
+ thread->Info()->Name());
+ // TODO: Use new name. This is currently not possible because the
+ // RegisteredThread's ThreadInfo cannot be changed.
+ // In the meantime, we record a marker that could be used in the frontend.
+ std::string text("Thread ");
+ text += std::to_string(profiler_current_thread_id().ToNumber());
+ text += " \"";
+ text += thread->Info()->Name();
+ text += "\" attempted to re-register as \"";
+ text += aName;
+ text += "\"";
+ BASE_PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling,
+ MarkerThreadId::MainThread(), text);
+
+ return &thread->RacyRegisteredThread().ProfilingStack();
+ }
+
+ void* stackTop = GetStackTop(aGuessStackTop);
+ return locked_register_thread(lock, aName, stackTop);
+}
+
+void profiler_unregister_thread() {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ PSAutoLock lock;
+
+ RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
+ MOZ_RELEASE_ASSERT(registeredThread ==
+ TLSRegisteredThread::RegisteredThread(lock));
+ if (registeredThread) {
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
+
+ if (ActivePS::Exists(lock)) {
+ ActivePS::UnregisterThread(lock, registeredThread);
+ }
+
+ // Clear the pointer to the RegisteredThread object that we're about to
+ // destroy.
+ TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+
+ // Remove the thread from the list of registered threads. This deletes the
+ // registeredThread object.
+ CorePS::RemoveRegisteredThread(lock, registeredThread);
+ } else {
+ LOG("profiler_unregister_thread() - thread %" PRIu64
+ " already unregistered",
+ uint64_t(profiler_current_thread_id().ToNumber()));
+ // We cannot record a marker on this thread because it was already
+ // unregistered. Send it to the main thread (unless this *is* already the
+ // main thread, which has been unregistered); this may be useful to catch
+ // mismatched register/unregister pairs in Firefox.
+ if (BaseProfilerThreadId tid = profiler_current_thread_id();
+ tid != profiler_main_thread_id()) {
+ BASE_PROFILER_MARKER_TEXT(
+ "profiler_unregister_thread again", OTHER_Profiling,
+ MarkerThreadId::MainThread(),
+ std::to_string(profiler_current_thread_id().ToNumber()));
+ }
+ // There are two ways FindCurrentThreadRegisteredThread() might have failed.
+ //
+ // - TLSRegisteredThread::Init() failed in locked_register_thread().
+ //
+ // - We've already called profiler_unregister_thread() for this thread.
+ // (Whether or not it should, this does happen in practice.)
+ //
+ // Either way, TLSRegisteredThread should be empty.
+ MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
+ }
+}
+
+void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
+ const std::string& aUrl,
+ uint64_t aEmbedderInnerWindowID) {
+ DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
+ aTabID, aInnerWindowID, aUrl.c_str(), aEmbedderInnerWindowID);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ // When a Browsing context is first loaded, the first url loaded in it will be
+ // about:blank. Because of that, this call keeps the first non-about:blank
+ // registration of window and discards the previous one.
+ RefPtr<PageInformation> pageInfo =
+ new PageInformation(aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
+ CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
+
+ // After appending the given page to CorePS, look for the expired
+ // pages and remove them if there are any.
+ if (ActivePS::Exists(lock)) {
+ ActivePS::DiscardExpiredPages(lock);
+ }
+}
+
+void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ PSAutoLock lock;
+
+ // During unregistration, if the profiler is active, we have to keep the
+ // page information since there may be some markers associated with the given
+ // page. But if profiler is not active. we have no reason to keep the
+ // page information here because there can't be any marker associated with it.
+ if (ActivePS::Exists(lock)) {
+ ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
+ } else {
+ CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
+ }
+}
+
+void profiler_clear_all_pages() {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ {
+ PSAutoLock lock;
+ CorePS::ClearRegisteredPages(lock);
+ if (ActivePS::Exists(lock)) {
+ ActivePS::ClearUnregisteredPages(lock);
+ }
+ }
+}
+
+void profiler_thread_sleep() {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return;
+ }
+
+ racyRegisteredThread->SetSleeping();
+}
+
+void profiler_thread_wake() {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return;
+ }
+
+ racyRegisteredThread->SetAwake();
+}
+
+bool detail::IsThreadBeingProfiled() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ const RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
+}
+
+bool profiler_thread_is_sleeping() {
+ MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return false;
+ }
+ return racyRegisteredThread->IsSleeping();
+}
+
+double profiler_time() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
+ return delta.ToMilliseconds();
+}
+
+bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer,
+ StackCaptureOptions aCaptureOptions) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock) ||
+ aCaptureOptions == StackCaptureOptions::NoStack) {
+ return false;
+ }
+
+ RegisteredThread* registeredThread =
+ TLSRegisteredThread::RegisteredThread(lock);
+ if (!registeredThread) {
+ MOZ_ASSERT(registeredThread);
+ return false;
+ }
+
+ ProfileBuffer profileBuffer(aChunkedBuffer);
+
+ Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+ REGISTERS_SYNC_POPULATE(regs);
+#else
+ regs.Clear();
+#endif
+
+ DoSyncSample(lock, *registeredThread, TimeStamp::Now(), regs, profileBuffer,
+ aCaptureOptions);
+
+ return true;
+}
+
+UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ AUTO_BASE_PROFILER_LABEL("baseprofiler::profiler_capture_backtrace",
+ PROFILER);
+
+ // Quick is-active check before allocating a buffer.
+ // If NoMarkerStacks is set, we don't want to capture a backtrace.
+ if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) {
+ return nullptr;
+ }
+
+ auto buffer = MakeUnique<ProfileChunkedBuffer>(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+ MakeUnique<ProfileBufferChunkManagerSingle>(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize));
+
+ if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) {
+ return nullptr;
+ }
+
+ return buffer;
+}
+
+UniqueProfilerBacktrace profiler_get_backtrace() {
+ UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
+
+ if (!buffer) {
+ return nullptr;
+ }
+
+ return UniqueProfilerBacktrace(
+ new ProfilerBacktrace("SyncProfile", std::move(buffer)));
+}
+
+void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
+ delete aBacktrace;
+}
+
+bool profiler_is_locked_on_current_thread() {
+ // This function is used to help users avoid calling `profiler_...` functions
+ // when the profiler may already have a lock in place, which would prevent a
+ // 2nd recursive lock (resulting in a crash or a never-ending wait).
+ // So we must return `true` for any of:
+ // - The main profiler mutex, used by most functions, and/or
+ // - The buffer mutex, used directly in some functions without locking the
+ // main mutex, e.g., marker-related functions.
+ return PSAutoLock::IsLockedOnCurrentThread() ||
+ profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread();
+}
+
+// This is a simplified version of profiler_add_marker that can be easily passed
+// into the JS engine.
+void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) {
+ BASE_PROFILER_MARKER_TEXT(
+ ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {},
+ ProfilerString8View::WrapNullTerminatedString(aMarkerText));
+}
+
+// NOTE: aCollector's methods will be called while the target thread is paused.
+// Doing things in those methods like allocating -- which may try to claim
+// locks -- is a surefire way to deadlock.
+void profiler_suspend_and_sample_thread(BaseProfilerThreadId aThreadId,
+ uint32_t aFeatures,
+ ProfilerStackCollector& aCollector,
+ bool aSampleNative /* = true */) {
+ const bool isSynchronous = [&aThreadId]() {
+ const BaseProfilerThreadId currentThreadId = profiler_current_thread_id();
+ if (!aThreadId.IsSpecified()) {
+ aThreadId = currentThreadId;
+ return true;
+ }
+ return aThreadId == currentThreadId;
+ }();
+
+ // Lock the profiler mutex
+ PSAutoLock lock;
+
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(lock);
+ for (auto& thread : registeredThreads) {
+ RefPtr<ThreadInfo> info = thread->Info();
+ RegisteredThread& registeredThread = *thread.get();
+
+ if (info->ThreadId() == aThreadId) {
+ if (info->IsMainThread()) {
+ aCollector.SetIsMainThread();
+ }
+
+ // Allocate the space for the native stack
+ NativeStack nativeStack;
+
+ auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
+ // The target thread is now suspended. Collect a native
+ // backtrace, and call the callback.
+#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
+ if (aSampleNative) {
+ // We can only use FramePointerStackWalk or MozStackWalk from
+ // suspend_and_sample_thread as other stackwalking methods may not be
+ // initialized.
+# if defined(USE_FRAME_POINTER_STACK_WALK)
+ DoFramePointerBacktrace(lock, registeredThread, aRegs, nativeStack);
+# elif defined(USE_MOZ_STACK_WALK)
+ DoMozStackWalkBacktrace(lock, registeredThread, aRegs, nativeStack);
+# else
+# error "Invalid configuration"
+# endif
+
+ MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+ nativeStack, aCollector);
+ } else
+#endif
+ {
+ MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+ nativeStack, aCollector);
+
+ aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
+ }
+ };
+
+ if (isSynchronous) {
+ // Sampling the current thread, do NOT suspend it!
+ Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+ REGISTERS_SYNC_POPULATE(regs);
+#else
+ regs.Clear();
+#endif
+ collectStack(regs, TimeStamp::Now());
+ } else {
+ // Suspend, sample, and then resume the target thread.
+ Sampler sampler(lock);
+ TimeStamp now = TimeStamp::Now();
+ sampler.SuspendAndSampleAndResumeThread(lock, registeredThread, now,
+ collectStack);
+
+ // NOTE: Make sure to disable the sampler before it is destroyed, in
+ // case the profiler is running at the same time.
+ sampler.Disable(lock);
+ }
+ break;
+ }
+ }
+}
+
+// END externally visible functions
+////////////////////////////////////////////////////////////////////////
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.h b/mozglue/baseprofiler/core/platform.h
new file mode 100644
index 0000000000..f77fc12726
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.h
@@ -0,0 +1,149 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#ifndef TOOLS_PLATFORM_H_
+#define TOOLS_PLATFORM_H_
+
+#include "PlatformMacros.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Logging.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+#include <functional>
+#include <stdint.h>
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+bool LogTest(int aLevelToTest);
+void PrintToConsole(const char* aFmt, ...) MOZ_FORMAT_PRINTF(1, 2);
+} // namespace baseprofiler
+} // namespace mozilla
+
+// These are for MOZ_BASE_PROFILER_LOGGING and above. It's the default logging
+// level for the profiler, and should be used sparingly.
+#define LOG_TEST ::mozilla::baseprofiler::LogTest(3)
+#define LOG(arg, ...) \
+ do { \
+ if (LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[I %d/%d] " arg "\n", \
+ int(::mozilla::baseprofiler::profiler_current_process_id() \
+ .ToNumber()), \
+ int(::mozilla::baseprofiler::profiler_current_thread_id() \
+ .ToNumber()), \
+ ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+// These are for MOZ_BASE_PROFILER_DEBUG_LOGGING. It should be used for logging
+// that is somewhat more verbose than LOG.
+#define DEBUG_LOG_TEST ::mozilla::baseprofiler::LogTest(4)
+#define DEBUG_LOG(arg, ...) \
+ do { \
+ if (DEBUG_LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[D %d/%d] " arg "\n", \
+ int(::mozilla::baseprofiler::profiler_current_process_id() \
+ .ToNumber()), \
+ int(::mozilla::baseprofiler::profiler_current_thread_id() \
+ .ToNumber()), \
+ ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+// These are for MOZ_BASE_PROFILER_VERBOSE_LOGGING. It should be used for
+// logging that is somewhat more verbose than DEBUG_LOG.
+#define VERBOSE_LOG_TEST ::mozilla::baseprofiler::LogTest(5)
+#define VERBOSE_LOG(arg, ...) \
+ do { \
+ if (VERBOSE_LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[V %d/%d] " arg "\n", \
+ int(::mozilla::baseprofiler::profiler_current_process_id() \
+ .ToNumber()), \
+ int(::mozilla::baseprofiler::profiler_current_thread_id() \
+ .ToNumber()), \
+ ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+namespace mozilla {
+
+class JSONWriter;
+
+namespace baseprofiler {
+
+// If positive, skip stack-sampling in the sampler thread loop.
+// Users should increment it atomically when samplings should be avoided, and
+// later decrement it back. Multiple uses can overlap.
+// There could be a sampling in progress when this is first incremented, so if
+// it is critical to prevent any sampling, lock the profiler mutex instead.
+// Relaxed ordering, because it's used to request that the profiler pause
+// future sampling; this is not time critical, nor dependent on anything else.
+extern mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling;
+
+typedef uint8_t* Address;
+
+class PlatformData;
+
+// We can't new/delete the type safely without defining it
+// (-Wdelete-incomplete). Use these to hide the details from clients.
+struct PlatformDataDestructor {
+ void operator()(PlatformData*);
+};
+
+typedef UniquePtr<PlatformData, PlatformDataDestructor> UniquePlatformData;
+UniquePlatformData AllocPlatformData(BaseProfilerThreadId aThreadId);
+
+// Convert the array of strings to a bitfield.
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+ uint32_t aFeatureCount,
+ bool aIsStartup = false);
+
+// Flags to conveniently track various JS instrumentations.
+enum class JSInstrumentationFlags {
+ StackSampling = 0x1,
+ Allocations = 0x2,
+};
+
+// Record an exit profile from a child process.
+void profiler_received_exit_profile(const std::string& aExitProfile);
+
+// Extract all received exit profiles that have not yet expired (i.e., they
+// still intersect with this process' buffer range).
+Vector<std::string> profiler_move_exit_profiles();
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* ndef TOOLS_PLATFORM_H_ */
diff --git a/mozglue/baseprofiler/core/shared-libraries-linux.cc b/mozglue/baseprofiler/core/shared-libraries-linux.cc
new file mode 100644
index 0000000000..84680bb59b
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-linux.cc
@@ -0,0 +1,855 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#define PATH_MAX_TOSTRING(x) #x
+#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x)
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fstream>
+#include "platform.h"
+#include "mozilla/Sprintf.h"
+
+#include <algorithm>
+#include <arpa/inet.h>
+#include <elf.h>
+#include <fcntl.h>
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+# include <features.h>
+#endif
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <vector>
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include <link.h> // dl_phdr_info, ElfW()
+#else
+# error "Unexpected configuration"
+#endif
+
+#if defined(GP_OS_android)
+extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr(
+ int (*callback)(struct dl_phdr_info* info, size_t size, void* data),
+ void* data);
+#endif
+
+#if defined(GP_OS_freebsd) && !defined(ElfW)
+# define ElfW(type) Elf_##type
+#endif
+
+// ----------------------------------------------------------------------------
+// Starting imports from toolkit/crashreporter/google-breakpad/, as needed by
+// this file when moved to mozglue.
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/memory_range.h.
+// A lightweight wrapper with a pointer and a length to encapsulate a contiguous
+// range of memory. It provides helper methods for checked access of a subrange
+// of the memory. Its implemementation does not allocate memory or call into
+// libc functions, and is thus safer to use in a crashed environment.
+class MemoryRange {
+ public:
+ MemoryRange() : data_(NULL), length_(0) {}
+
+ MemoryRange(const void* data, size_t length) { Set(data, length); }
+
+ // Returns true if this memory range contains no data.
+ bool IsEmpty() const {
+ // Set() guarantees that |length_| is zero if |data_| is NULL.
+ return length_ == 0;
+ }
+
+ // Resets to an empty range.
+ void Reset() {
+ data_ = NULL;
+ length_ = 0;
+ }
+
+ // Sets this memory range to point to |data| and its length to |length|.
+ void Set(const void* data, size_t length) {
+ data_ = reinterpret_cast<const uint8_t*>(data);
+ // Always set |length_| to zero if |data_| is NULL.
+ length_ = data ? length : 0;
+ }
+
+ // Returns true if this range covers a subrange of |sub_length| bytes
+ // at |sub_offset| bytes of this memory range, or false otherwise.
+ bool Covers(size_t sub_offset, size_t sub_length) const {
+ // The following checks verify that:
+ // 1. sub_offset is within [ 0 .. length_ - 1 ]
+ // 2. sub_offset + sub_length is within
+ // [ sub_offset .. length_ ]
+ return sub_offset < length_ && sub_offset + sub_length >= sub_offset &&
+ sub_offset + sub_length <= length_;
+ }
+
+ // Returns a raw data pointer to a subrange of |sub_length| bytes at
+ // |sub_offset| bytes of this memory range, or NULL if the subrange
+ // is out of bounds.
+ const void* GetData(size_t sub_offset, size_t sub_length) const {
+ return Covers(sub_offset, sub_length) ? (data_ + sub_offset) : NULL;
+ }
+
+ // Same as the two-argument version of GetData() but uses sizeof(DataType)
+ // as the subrange length and returns an |DataType| pointer for convenience.
+ template <typename DataType>
+ const DataType* GetData(size_t sub_offset) const {
+ return reinterpret_cast<const DataType*>(
+ GetData(sub_offset, sizeof(DataType)));
+ }
+
+ // Returns a raw pointer to the |element_index|-th element of an array
+ // of elements of length |element_size| starting at |sub_offset| bytes
+ // of this memory range, or NULL if the element is out of bounds.
+ const void* GetArrayElement(size_t element_offset, size_t element_size,
+ unsigned element_index) const {
+ size_t sub_offset = element_offset + element_index * element_size;
+ return GetData(sub_offset, element_size);
+ }
+
+ // Same as the three-argument version of GetArrayElement() but deduces
+ // the element size using sizeof(ElementType) and returns an |ElementType|
+ // pointer for convenience.
+ template <typename ElementType>
+ const ElementType* GetArrayElement(size_t element_offset,
+ unsigned element_index) const {
+ return reinterpret_cast<const ElementType*>(
+ GetArrayElement(element_offset, sizeof(ElementType), element_index));
+ }
+
+ // Returns a subrange of |sub_length| bytes at |sub_offset| bytes of
+ // this memory range, or an empty range if the subrange is out of bounds.
+ MemoryRange Subrange(size_t sub_offset, size_t sub_length) const {
+ return Covers(sub_offset, sub_length)
+ ? MemoryRange(data_ + sub_offset, sub_length)
+ : MemoryRange();
+ }
+
+ // Returns a pointer to the beginning of this memory range.
+ const uint8_t* data() const { return data_; }
+
+ // Returns the length, in bytes, of this memory range.
+ size_t length() const { return length_; }
+
+ private:
+ // Pointer to the beginning of this memory range.
+ const uint8_t* data_;
+
+ // Length, in bytes, of this memory range.
+ size_t length_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.h
+// and inlined .cc.
+// A utility class for mapping a file into memory for read-only access of the
+// file content. Its implementation avoids calling into libc functions by
+// directly making system calls for open, close, mmap, and munmap.
+class MemoryMappedFile {
+ public:
+ MemoryMappedFile() {}
+
+ // Constructor that calls Map() to map a file at |path| into memory.
+ // If Map() fails, the object behaves as if it is default constructed.
+ MemoryMappedFile(const char* path, size_t offset) { Map(path, offset); }
+
+ MemoryMappedFile(const MemoryMappedFile&) = delete;
+ MemoryMappedFile& operator=(const MemoryMappedFile&) = delete;
+
+ ~MemoryMappedFile() {}
+
+ // Maps a file at |path| into memory, which can then be accessed via
+ // content() as a MemoryRange object or via data(), and returns true on
+ // success. Mapping an empty file will succeed but with data() and size()
+ // returning NULL and 0, respectively. An existing mapping is unmapped
+ // before a new mapping is created.
+ bool Map(const char* path, size_t offset) {
+ Unmap();
+
+ int fd = open(path, O_RDONLY, 0);
+ if (fd == -1) {
+ return false;
+ }
+
+#if defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__mips__) && _MIPS_SIM == _ABI64) || \
+ !(defined(GP_OS_linux) || defined(GP_OS_android))
+
+ struct stat st;
+ if (fstat(fd, &st) == -1 || st.st_size < 0) {
+#else
+ struct stat64 st;
+ if (fstat64(fd, &st) == -1 || st.st_size < 0) {
+#endif
+ close(fd);
+ return false;
+ }
+
+ // Strangely file size can be negative, but we check above that it is not.
+ size_t file_len = static_cast<size_t>(st.st_size);
+ // If the file does not extend beyond the offset, simply use an empty
+ // MemoryRange and return true. Don't bother to call mmap()
+ // even though mmap() can handle an empty file on some platforms.
+ if (offset >= file_len) {
+ close(fd);
+ return true;
+ }
+
+ void* data = mmap(NULL, file_len, PROT_READ, MAP_PRIVATE, fd, offset);
+ close(fd);
+ if (data == MAP_FAILED) {
+ return false;
+ }
+
+ content_.Set(data, file_len - offset);
+ return true;
+ }
+
+ // Unmaps the memory for the mapped file. It's a no-op if no file is
+ // mapped.
+ void Unmap() {
+ if (content_.data()) {
+ munmap(const_cast<uint8_t*>(content_.data()), content_.length());
+ content_.Set(NULL, 0);
+ }
+ }
+
+ // Returns a MemoryRange object that covers the memory for the mapped
+ // file. The MemoryRange object is empty if no file is mapped.
+ const MemoryRange& content() const { return content_; }
+
+ // Returns a pointer to the beginning of the memory for the mapped file.
+ // or NULL if no file is mapped or the mapped file is empty.
+ const void* data() const { return content_.data(); }
+
+ // Returns the size in bytes of the mapped file, or zero if no file
+ // is mapped.
+ size_t size() const { return content_.length(); }
+
+ private:
+ // Mapped file content as a MemoryRange object.
+ MemoryRange content_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/file_id.h and inlined
+// .cc.
+// GNU binutils' ld defaults to 'sha1', which is 160 bits == 20 bytes,
+// so this is enough to fit that, which most binaries will use.
+// This is just a sensible default for vectors so most callers can get away with
+// stack allocation.
+static const size_t kDefaultBuildIdSize = 20;
+
+// Used in a few places for backwards-compatibility.
+typedef struct {
+ uint32_t data1;
+ uint16_t data2;
+ uint16_t data3;
+ uint8_t data4[8];
+} MDGUID; /* GUID */
+
+const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+ explicit FileID(const char* path) : path_(path) {}
+ ~FileID() {}
+
+ // Load the identifier for the elf file path specified in the constructor into
+ // |identifier|.
+ //
+ // The current implementation will look for a .note.gnu.build-id
+ // section and use that as the file id, otherwise it falls back to
+ // XORing the first 4096 bytes of the .text section to generate an identifier.
+ bool ElfFileIdentifier(std::vector<uint8_t>& identifier) {
+ MemoryMappedFile mapped_file(path_.c_str(), 0);
+ if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)?
+ return false;
+
+ return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
+ }
+
+ // Traits classes so consumers can write templatized code to deal
+ // with specific ELF bits.
+ struct ElfClass32 {
+ typedef Elf32_Addr Addr;
+ typedef Elf32_Ehdr Ehdr;
+ typedef Elf32_Nhdr Nhdr;
+ typedef Elf32_Phdr Phdr;
+ typedef Elf32_Shdr Shdr;
+ typedef Elf32_Half Half;
+ typedef Elf32_Off Off;
+ typedef Elf32_Sym Sym;
+ typedef Elf32_Word Word;
+
+ static const int kClass = ELFCLASS32;
+ static const uint16_t kMachine = EM_386;
+ static const size_t kAddrSize = sizeof(Elf32_Addr);
+ static constexpr const char* kMachineName = "x86";
+ };
+
+ struct ElfClass64 {
+ typedef Elf64_Addr Addr;
+ typedef Elf64_Ehdr Ehdr;
+ typedef Elf64_Nhdr Nhdr;
+ typedef Elf64_Phdr Phdr;
+ typedef Elf64_Shdr Shdr;
+ typedef Elf64_Half Half;
+ typedef Elf64_Off Off;
+ typedef Elf64_Sym Sym;
+ typedef Elf64_Word Word;
+
+ static const int kClass = ELFCLASS64;
+ static const uint16_t kMachine = EM_X86_64;
+ static const size_t kAddrSize = sizeof(Elf64_Addr);
+ static constexpr const char* kMachineName = "x86_64";
+ };
+
+ // Internal helper method, exposed for convenience for callers
+ // that already have more info.
+ template <typename ElfClass>
+ static const typename ElfClass::Shdr* FindElfSectionByName(
+ const char* name, typename ElfClass::Word section_type,
+ const typename ElfClass::Shdr* sections, const char* section_names,
+ const char* names_end, int nsection) {
+ if (!name || !sections || nsection == 0) {
+ return NULL;
+ }
+
+ int name_len = strlen(name);
+ if (name_len == 0) return NULL;
+
+ for (int i = 0; i < nsection; ++i) {
+ const char* section_name = section_names + sections[i].sh_name;
+ if (sections[i].sh_type == section_type &&
+ names_end - section_name >= name_len + 1 &&
+ strcmp(name, section_name) == 0) {
+ return sections + i;
+ }
+ }
+ return NULL;
+ }
+
+ struct ElfSegment {
+ const void* start;
+ size_t size;
+ };
+
+ // Convert an offset from an Elf header into a pointer to the mapped
+ // address in the current process. Takes an extra template parameter
+ // to specify the return type to avoid having to dynamic_cast the
+ // result.
+ template <typename ElfClass, typename T>
+ static const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+ typename ElfClass::Off offset) {
+ return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+ offset);
+ }
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+ static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
+ std::vector<uint8_t>& identifier) {
+ static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
+ "Elf32_Nhdr and Elf64_Nhdr should be the same");
+ typedef typename ElfClass32::Nhdr Nhdr;
+
+ const void* section_end = reinterpret_cast<const char*>(section) + length;
+ const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+ while (reinterpret_cast<const void*>(note_header) < section_end) {
+ if (note_header->n_type == NT_GNU_BUILD_ID) break;
+ note_header = reinterpret_cast<const Nhdr*>(
+ reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+ NOTE_PADDING(note_header->n_namesz) +
+ NOTE_PADDING(note_header->n_descsz));
+ }
+ if (reinterpret_cast<const void*>(note_header) >= section_end ||
+ note_header->n_descsz == 0) {
+ return false;
+ }
+
+ const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
+ sizeof(Nhdr) +
+ NOTE_PADDING(note_header->n_namesz);
+ identifier.insert(identifier.end(), build_id,
+ build_id + note_header->n_descsz);
+
+ return true;
+ }
+
+ template <typename ElfClass>
+ static bool FindElfClassSection(const char* elf_base,
+ const char* section_name,
+ typename ElfClass::Word section_type,
+ const void** section_start,
+ size_t* section_size) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Shdr Shdr;
+
+ if (!elf_base || !section_start || !section_size) {
+ return false;
+ }
+
+ if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+ return false;
+ }
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+ return false;
+ }
+
+ const Shdr* sections =
+ GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+ const Shdr* section_names = sections + elf_header->e_shstrndx;
+ const char* names =
+ GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+ const char* names_end = names + section_names->sh_size;
+
+ const Shdr* section =
+ FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+ names, names_end, elf_header->e_shnum);
+
+ if (section != NULL && section->sh_size > 0) {
+ *section_start = elf_base + section->sh_offset;
+ *section_size = section->sh_size;
+ }
+
+ return true;
+ }
+
+ template <typename ElfClass>
+ static bool FindElfClassSegment(const char* elf_base,
+ typename ElfClass::Word segment_type,
+ std::vector<ElfSegment>* segments) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Phdr Phdr;
+
+ if (!elf_base || !segments) {
+ return false;
+ }
+
+ if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+ return false;
+ }
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+ return false;
+ }
+
+ const Phdr* phdrs =
+ GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+ for (int i = 0; i < elf_header->e_phnum; ++i) {
+ if (phdrs[i].p_type == segment_type) {
+ ElfSegment seg = {};
+ seg.start = elf_base + phdrs[i].p_offset;
+ seg.size = phdrs[i].p_filesz;
+ segments->push_back(seg);
+ }
+ }
+
+ return true;
+ }
+
+ static bool IsValidElf(const void* elf_base) {
+ return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) ==
+ 0;
+ }
+
+ static int ElfClass(const void* elf_base) {
+ const ElfW(Ehdr)* elf_header =
+ reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+ return elf_header->e_ident[EI_CLASS];
+ }
+
+ static bool FindElfSection(const void* elf_mapped_base,
+ const char* section_name, uint32_t section_type,
+ const void** section_start, size_t* section_size) {
+ if (!elf_mapped_base || !section_start || !section_size) {
+ return false;
+ }
+
+ *section_start = NULL;
+ *section_size = 0;
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ return FindElfClassSection<ElfClass32>(elf_base, section_name,
+ section_type, section_start,
+ section_size) &&
+ *section_start != NULL;
+ } else if (cls == ELFCLASS64) {
+ return FindElfClassSection<ElfClass64>(elf_base, section_name,
+ section_type, section_start,
+ section_size) &&
+ *section_start != NULL;
+ }
+
+ return false;
+ }
+
+ static bool FindElfSegments(const void* elf_mapped_base,
+ uint32_t segment_type,
+ std::vector<ElfSegment>* segments) {
+ if (!elf_mapped_base || !segments) {
+ return false;
+ }
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ return FindElfClassSegment<ElfClass32>(elf_base, segment_type, segments);
+ }
+ if (cls == ELFCLASS64) {
+ return FindElfClassSegment<ElfClass64>(elf_base, segment_type, segments);
+ }
+
+ return false;
+ }
+
+ // Attempt to locate a .note.gnu.build-id section in an ELF binary
+ // and copy it into |identifier|.
+ static bool FindElfBuildIDNote(const void* elf_mapped_base,
+ std::vector<uint8_t>& identifier) {
+ // lld normally creates 2 PT_NOTEs, gold normally creates 1.
+ std::vector<ElfSegment> segs;
+ if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
+ for (ElfSegment& seg : segs) {
+ if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
+ return true;
+ }
+ }
+ }
+
+ void* note_section;
+ size_t note_size;
+ if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+ (const void**)&note_section, &note_size)) {
+ return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
+ }
+
+ return false;
+ }
+
+ // Attempt to locate the .text section of an ELF binary and generate
+ // a simple hash by XORing the first page worth of bytes into |identifier|.
+ static bool HashElfTextSection(const void* elf_mapped_base,
+ std::vector<uint8_t>& identifier) {
+ identifier.resize(kMDGUIDSize);
+
+ void* text_section;
+ size_t text_size;
+ if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+ (const void**)&text_section, &text_size) ||
+ text_size == 0) {
+ return false;
+ }
+
+ // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
+ // function backwards-compatible.
+ memset(&identifier[0], 0, kMDGUIDSize);
+ const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+ const uint8_t* ptr_end =
+ ptr + std::min(text_size, static_cast<size_t>(4096));
+ while (ptr < ptr_end) {
+ for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+ ptr += kMDGUIDSize;
+ }
+ return true;
+ }
+
+ // Load the identifier for the elf file mapped into memory at |base| into
+ // |identifier|. Return false if the identifier could not be created for this
+ // file.
+ static bool ElfFileIdentifierFromMappedFile(
+ const void* base, std::vector<uint8_t>& identifier) {
+ // Look for a build id note first.
+ if (FindElfBuildIDNote(base, identifier)) return true;
+
+ // Fall back on hashing the first page of the text section.
+ return HashElfTextSection(base, identifier);
+ }
+
+ // These three functions are not ever called in an unsafe context, so it's OK
+ // to allocate memory and use libc.
+ static std::string bytes_to_hex_string(const uint8_t* bytes, size_t count,
+ bool lowercase = false) {
+ std::string result;
+ for (unsigned int idx = 0; idx < count; ++idx) {
+ char buf[3];
+ SprintfLiteral(buf, lowercase ? "%02x" : "%02X", bytes[idx]);
+ result.append(buf);
+ }
+ return result;
+ }
+
+ // Convert the |identifier| data to a string. The string will
+ // be formatted as a UUID in all uppercase without dashes.
+ // (e.g., 22F065BBFC9C49F780FE26A7CEBD7BCE).
+ static std::string ConvertIdentifierToUUIDString(
+ const std::vector<uint8_t>& identifier) {
+ uint8_t identifier_swapped[kMDGUIDSize] = {0};
+
+ // Endian-ness swap to match dump processor expectation.
+ memcpy(identifier_swapped, &identifier[0],
+ std::min(kMDGUIDSize, identifier.size()));
+ uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+ *data1 = htonl(*data1);
+ uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+ *data2 = htons(*data2);
+ uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+ *data3 = htons(*data3);
+
+ return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
+ }
+
+ // Convert the entire |identifier| data to a lowercase hex string.
+ static std::string ConvertIdentifierToString(
+ const std::vector<uint8_t>& identifier) {
+ return bytes_to_hex_string(&identifier[0], identifier.size(),
+ /* lowercase */ true);
+ }
+
+ private:
+ // Storage for the path specified
+ std::string path_;
+};
+
+// End of imports from toolkit/crashreporter/google-breakpad/.
+// ----------------------------------------------------------------------------
+
+struct LoadedLibraryInfo {
+ LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress,
+ unsigned long aFirstMappingStart,
+ unsigned long aLastMappingEnd)
+ : mName(aName),
+ mBaseAddress(aBaseAddress),
+ mFirstMappingStart(aFirstMappingStart),
+ mLastMappingEnd(aLastMappingEnd) {}
+
+ std::string mName;
+ unsigned long mBaseAddress;
+ unsigned long mFirstMappingStart;
+ unsigned long mLastMappingEnd;
+};
+
+static std::string IDtoUUIDString(const std::vector<uint8_t>& aIdentifier) {
+ std::string uuid = FileID::ConvertIdentifierToUUIDString(aIdentifier);
+ // This is '0', not '\0', since it represents the breakpad id age.
+ uuid += '0';
+ return uuid;
+}
+
+// Return raw Build ID in hex.
+static std::string IDtoString(const std::vector<uint8_t>& aIdentifier) {
+ std::string uuid = FileID::ConvertIdentifierToString(aIdentifier);
+ return uuid;
+}
+
+// Get the breakpad Id for the binary file pointed by bin_name
+static std::string getBreakpadId(const char* bin_name) {
+ std::vector<uint8_t> identifier;
+ identifier.reserve(kDefaultBuildIdSize);
+
+ FileID file_id(bin_name);
+ if (file_id.ElfFileIdentifier(identifier)) {
+ return IDtoUUIDString(identifier);
+ }
+
+ return {};
+}
+
+// Get the code Id for the binary file pointed by bin_name
+static std::string getCodeId(const char* bin_name) {
+ std::vector<uint8_t> identifier;
+ identifier.reserve(kDefaultBuildIdSize);
+
+ FileID file_id(bin_name);
+ if (file_id.ElfFileIdentifier(identifier)) {
+ return IDtoString(identifier);
+ }
+
+ return {};
+}
+
+static SharedLibrary SharedLibraryAtPath(const char* path,
+ unsigned long libStart,
+ unsigned long libEnd,
+ unsigned long offset = 0) {
+ std::string pathStr = path;
+
+ size_t pos = pathStr.rfind('\\');
+ std::string nameStr =
+ (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+ return SharedLibrary(libStart, libEnd, offset, getBreakpadId(path),
+ getCodeId(path), nameStr, pathStr, nameStr, pathStr,
+ std::string{}, "");
+}
+
+static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size,
+ void* data) {
+ auto libInfoList = reinterpret_cast<std::vector<LoadedLibraryInfo>*>(data);
+
+ if (dl_info->dlpi_phnum <= 0) return 0;
+
+ unsigned long baseAddress = dl_info->dlpi_addr;
+ unsigned long firstMappingStart = -1;
+ unsigned long lastMappingEnd = 0;
+
+ for (size_t i = 0; i < dl_info->dlpi_phnum; i++) {
+ if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) {
+ continue;
+ }
+ unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr;
+ unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz;
+ if (start < firstMappingStart) {
+ firstMappingStart = start;
+ }
+ if (end > lastMappingEnd) {
+ lastMappingEnd = end;
+ }
+ }
+
+ libInfoList->push_back(LoadedLibraryInfo(dl_info->dlpi_name, baseAddress,
+ firstMappingStart, lastMappingEnd));
+
+ return 0;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibraryInfo info;
+
+#if defined(GP_OS_linux)
+ // We need to find the name of the executable (exeName, exeNameLen) and the
+ // address of its executable section (exeExeAddr) in the running image.
+ char exeName[PATH_MAX];
+ memset(exeName, 0, sizeof(exeName));
+
+ ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1);
+ if (exeNameLen == -1) {
+ // readlink failed for whatever reason. Note this, but keep going.
+ exeName[0] = '\0';
+ exeNameLen = 0;
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed");
+ } else {
+ // Assert no buffer overflow.
+ MOZ_RELEASE_ASSERT(exeNameLen >= 0 &&
+ exeNameLen < static_cast<ssize_t>(sizeof(exeName)));
+ }
+
+ unsigned long exeExeAddr = 0;
+#endif
+
+#if defined(GP_OS_android)
+ // If dl_iterate_phdr doesn't exist, we give up immediately.
+ if (!dl_iterate_phdr) {
+ // On ARM Android, dl_iterate_phdr is provided by the custom linker.
+ // So if libxul was loaded by the system linker (e.g. as part of
+ // xpcshell when running tests), it won't be available and we should
+ // not call it.
+ return info;
+ }
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+ // Read info from /proc/self/maps. We ignore most of it.
+ pid_t pid = mozilla::baseprofiler::profiler_current_process_id().ToNumber();
+ char path[PATH_MAX];
+ SprintfLiteral(path, "/proc/%d/maps", pid);
+ std::ifstream maps(path);
+ std::string line;
+ while (std::getline(maps, line)) {
+ int ret;
+ unsigned long start;
+ unsigned long end;
+ char perm[6 + 1] = "";
+ unsigned long offset;
+ char modulePath[PATH_MAX + 1] = "";
+ ret = sscanf(line.c_str(),
+ "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n",
+ &start, &end, perm, &offset, modulePath);
+ if (!strchr(perm, 'x')) {
+ // Ignore non executable entries
+ continue;
+ }
+ if (ret != 5 && ret != 4) {
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+ // "reading /proc/self/maps failed");
+ continue;
+ }
+
+# if defined(GP_OS_linux)
+ // Try to establish the main executable's load address.
+ if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) {
+ exeExeAddr = start;
+ }
+# elif defined(GP_OS_android)
+ // Use /proc/pid/maps to get the dalvik-jit section since it has no
+ // associated phdrs.
+ if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) {
+ info.AddSharedLibrary(
+ SharedLibraryAtPath(modulePath, start, end, offset));
+ if (info.GetSize() > 10000) {
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+ // "implausibly large number of mappings acquired");
+ break;
+ }
+ }
+# endif
+ }
+#endif
+
+ std::vector<LoadedLibraryInfo> libInfoList;
+
+ // We collect the bulk of the library info using dl_iterate_phdr.
+ dl_iterate_phdr(dl_iterate_callback, &libInfoList);
+
+ for (const auto& libInfo : libInfoList) {
+ info.AddSharedLibrary(
+ SharedLibraryAtPath(libInfo.mName.c_str(), libInfo.mFirstMappingStart,
+ libInfo.mLastMappingEnd,
+ libInfo.mFirstMappingStart - libInfo.mBaseAddress));
+ }
+
+#if defined(GP_OS_linux)
+ // Make another pass over the information we just harvested from
+ // dl_iterate_phdr. If we see a nameless object mapped at what we earlier
+ // established to be the main executable's load address, attach the
+ // executable's name to that entry.
+ for (size_t i = 0; i < info.GetSize(); i++) {
+ SharedLibrary& lib = info.GetMutableEntry(i);
+ if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() &&
+ lib.GetDebugPath().empty()) {
+ lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(),
+ lib.GetOffset());
+
+ // We only expect to see one such entry.
+ break;
+ }
+ }
+#endif
+
+ return info;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-macos.cc b/mozglue/baseprofiler/core/shared-libraries-macos.cc
new file mode 100644
index 0000000000..f995581001
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-macos.cc
@@ -0,0 +1,206 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "platform.h"
+
+#include "mozilla/Unused.h"
+#include <AvailabilityMacros.h>
+
+#include <dlfcn.h>
+#include <mach-o/arch.h>
+#include <mach-o/dyld_images.h>
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <mach/mach_init.h>
+#include <mach/mach_traps.h>
+#include <mach/task_info.h>
+#include <mach/task.h>
+#include <sstream>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+
+// Architecture specific abstraction.
+#if defined(GP_ARCH_x86)
+typedef mach_header platform_mach_header;
+typedef segment_command mach_segment_command_type;
+# define MACHO_MAGIC_NUMBER MH_MAGIC
+# define CMD_SEGMENT LC_SEGMENT
+# define seg_size uint32_t
+#else
+typedef mach_header_64 platform_mach_header;
+typedef segment_command_64 mach_segment_command_type;
+# define MACHO_MAGIC_NUMBER MH_MAGIC_64
+# define CMD_SEGMENT LC_SEGMENT_64
+# define seg_size uint64_t
+#endif
+
+struct NativeSharedLibrary {
+ const platform_mach_header* header;
+ std::string path;
+};
+static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr;
+
+class MOZ_RAII SharedLibrariesLock {
+ public:
+ SharedLibrariesLock() { sSharedLibrariesMutex.Lock(); }
+
+ ~SharedLibrariesLock() { sSharedLibrariesMutex.Unlock(); }
+
+ SharedLibrariesLock(const SharedLibrariesLock&) = delete;
+ void operator=(const SharedLibrariesLock&) = delete;
+
+ private:
+ static mozilla::baseprofiler::detail::BaseProfilerMutex sSharedLibrariesMutex;
+};
+
+mozilla::baseprofiler::detail::BaseProfilerMutex
+ SharedLibrariesLock::sSharedLibrariesMutex;
+
+static void SharedLibraryAddImage(const struct mach_header* mh,
+ intptr_t vmaddr_slide) {
+ // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+ // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+ // it to the right type here.
+ auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+ Dl_info info;
+ if (!dladdr(header, &info)) {
+ return;
+ }
+
+ SharedLibrariesLock lock;
+ if (!sSharedLibrariesList) {
+ return;
+ }
+
+ NativeSharedLibrary lib = {header, info.dli_fname};
+ sSharedLibrariesList->push_back(lib);
+}
+
+static void SharedLibraryRemoveImage(const struct mach_header* mh,
+ intptr_t vmaddr_slide) {
+ // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+ // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+ // it to the right type here.
+ auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+ SharedLibrariesLock lock;
+ if (!sSharedLibrariesList) {
+ return;
+ }
+
+ uint32_t count = sSharedLibrariesList->size();
+ for (uint32_t i = 0; i < count; ++i) {
+ if ((*sSharedLibrariesList)[i].header == header) {
+ sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i);
+ return;
+ }
+ }
+}
+
+void SharedLibraryInfo::Initialize() {
+ // NOTE: We intentionally leak this memory here. We're allocating dynamically
+ // in order to avoid static initializers.
+ sSharedLibrariesList = new std::vector<NativeSharedLibrary>();
+
+ _dyld_register_func_for_add_image(SharedLibraryAddImage);
+ _dyld_register_func_for_remove_image(SharedLibraryRemoveImage);
+}
+
+static void addSharedLibrary(const platform_mach_header* header,
+ const char* path, SharedLibraryInfo& info) {
+ const struct load_command* cmd =
+ reinterpret_cast<const struct load_command*>(header + 1);
+
+ seg_size size = 0;
+ unsigned long long start = reinterpret_cast<unsigned long long>(header);
+ // Find the cmd segment in the macho image. It will contain the offset we care
+ // about.
+ const uint8_t* uuid_bytes = nullptr;
+ for (unsigned int i = 0;
+ cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0);
+ ++i) {
+ if (cmd->cmd == CMD_SEGMENT) {
+ const mach_segment_command_type* seg =
+ reinterpret_cast<const mach_segment_command_type*>(cmd);
+
+ if (!strcmp(seg->segname, "__TEXT")) {
+ size = seg->vmsize;
+ }
+ } else if (cmd->cmd == LC_UUID) {
+ const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd);
+ uuid_bytes = ucmd->uuid;
+ }
+
+ cmd = reinterpret_cast<const struct load_command*>(
+ reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
+ }
+
+ std::string uuid;
+ std::string breakpadId;
+ if (uuid_bytes != nullptr) {
+ static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+ for (int i = 0; i < 16; ++i) {
+ uint8_t byte = uuid_bytes[i];
+ uuid += digits[byte >> 4];
+ uuid += digits[byte & 0xFu];
+ }
+
+ // Breakpad id is the same as the uuid but with the additional trailing 0
+ // for the breakpad id age.
+ breakpadId = uuid;
+ // breakpad id age.
+ breakpadId += '0';
+ }
+
+ std::string pathStr = path;
+
+ size_t pos = pathStr.rfind('\\');
+ std::string nameStr =
+ (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+ const NXArchInfo* archInfo =
+ NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype);
+
+ info.AddSharedLibrary(SharedLibrary(
+ start, start + size, 0, breakpadId, uuid, nameStr, pathStr, nameStr,
+ pathStr, std::string{}, archInfo ? archInfo->name : ""));
+}
+
+// Translate the statically stored sSharedLibrariesList information into a
+// SharedLibraryInfo object.
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibrariesLock lock;
+ SharedLibraryInfo sharedLibraryInfo;
+
+ for (auto& info : *sSharedLibrariesList) {
+ addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo);
+ }
+
+ // Add the entry for dyld itself.
+ // We only support macOS 10.12+, which corresponds to dyld version 15+.
+ // dyld version 15 added the dyldPath property.
+ task_dyld_info_data_t task_dyld_info;
+ mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
+ if (task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
+ &count) != KERN_SUCCESS) {
+ return sharedLibraryInfo;
+ }
+
+ struct dyld_all_image_infos* aii =
+ (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr;
+ if (aii->version >= 15) {
+ const platform_mach_header* header =
+ reinterpret_cast<const platform_mach_header*>(
+ aii->dyldImageLoadAddress);
+ addSharedLibrary(header, aii->dyldPath, sharedLibraryInfo);
+ }
+
+ return sharedLibraryInfo;
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-win32.cc b/mozglue/baseprofiler/core/shared-libraries-win32.cc
new file mode 100644
index 0000000000..2a453612d3
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-win32.cc
@@ -0,0 +1,198 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "mozilla/glue/WindowsUnicode.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/WindowsEnumProcessModules.h"
+
+#include <cctype>
+#include <string>
+
+static constexpr char uppercaseDigits[16] = {'0', '1', '2', '3', '4', '5',
+ '6', '7', '8', '9', 'A', 'B',
+ 'C', 'D', 'E', 'F'};
+static constexpr char lowercaseDigits[16] = {'0', '1', '2', '3', '4', '5',
+ '6', '7', '8', '9', 'a', 'b',
+ 'c', 'd', 'e', 'f'};
+
+static void AppendHex(const unsigned char* aBegin, const unsigned char* aEnd,
+ std::string& aOut) {
+ for (const unsigned char* p = aBegin; p < aEnd; ++p) {
+ unsigned char c = *p;
+ aOut += uppercaseDigits[c >> 4];
+ aOut += uppercaseDigits[c & 0xFu];
+ }
+}
+
+static constexpr bool WITH_PADDING = true;
+static constexpr bool WITHOUT_PADDING = false;
+static constexpr bool LOWERCASE = true;
+static constexpr bool UPPERCASE = false;
+template <typename T>
+static void AppendHex(T aValue, std::string& aOut, bool aWithPadding,
+ bool aLowercase = UPPERCASE) {
+ for (int i = sizeof(T) * 2 - 1; i >= 0; --i) {
+ unsigned nibble = (aValue >> (i * 4)) & 0xFu;
+ // If no-padding requested, skip starting zeroes -- unless we're on the very
+ // last nibble (so we don't output a blank).
+ if (!aWithPadding && i != 0) {
+ if (nibble == 0) {
+ // Requested no padding, skip zeroes.
+ continue;
+ }
+ // Requested no padding, got first non-zero, pretend we now want padding
+ // so we don't skip zeroes anymore.
+ aWithPadding = true;
+ }
+ aOut += aLowercase ? lowercaseDigits[nibble] : uppercaseDigits[nibble];
+ }
+}
+
+static bool IsModuleUnsafeToLoad(const std::string& aModuleName) {
+ auto LowerCaseEqualsLiteral = [](char aModuleChar, char aDetouredChar) {
+ return std::tolower(aModuleChar) == aDetouredChar;
+ };
+
+ // Hackaround for Bug 1723868. There is no safe way to prevent the module
+ // Microsoft's VP9 Video Decoder from being unloaded because mfplat.dll may
+ // have posted more than one task to unload the module in the work queue
+ // without calling LoadLibrary.
+ constexpr std::string_view vp9_decoder_dll = "msvp9dec_store.dll";
+ if (std::equal(aModuleName.cbegin(), aModuleName.cend(),
+ vp9_decoder_dll.cbegin(), vp9_decoder_dll.cend(),
+ LowerCaseEqualsLiteral)) {
+ return true;
+ }
+
+ return false;
+}
+
+void SharedLibraryInfo::AddSharedLibraryFromModuleInfo(
+ const wchar_t* aModulePath, mozilla::Maybe<HMODULE> aModule) {
+ mozilla::UniquePtr<char[]> utf8ModulePath(
+ mozilla::glue::WideToUTF8(aModulePath));
+ if (!utf8ModulePath) {
+ return;
+ }
+
+ std::string modulePathStr(utf8ModulePath.get());
+ size_t pos = modulePathStr.find_last_of("\\/");
+ std::string moduleNameStr = (pos != std::string::npos)
+ ? modulePathStr.substr(pos + 1)
+ : modulePathStr;
+
+ // If the module is unsafe to call LoadLibraryEx for, we skip.
+ if (IsModuleUnsafeToLoad(moduleNameStr)) {
+ return;
+ }
+
+ // Load the module again - to make sure that its handle will remain valid as
+ // we attempt to read the PDB information from it - or for the first time if
+ // we only have a path. We want to load the DLL without running the newly
+ // loaded module's DllMain function, but not as a data file because we want
+ // to be able to do RVA computations easily. Hence, we use the flag
+ // LOAD_LIBRARY_AS_IMAGE_RESOURCE which ensures that the sections (not PE
+ // headers) will be relocated by the loader. Otherwise GetPdbInfo() and/or
+ // GetVersionInfo() can cause a crash. If the original handle |aModule| is
+ // valid, LoadLibraryEx just increments its refcount.
+ nsModuleHandle handleLock(
+ ::LoadLibraryExW(aModulePath, NULL, LOAD_LIBRARY_AS_IMAGE_RESOURCE));
+ if (!handleLock) {
+ return;
+ }
+
+ mozilla::nt::PEHeaders headers(handleLock.get());
+ if (!headers) {
+ return;
+ }
+
+ mozilla::Maybe<mozilla::Range<const uint8_t>> bounds = headers.GetBounds();
+ if (!bounds) {
+ return;
+ }
+
+ // Put the original |aModule| into SharedLibrary, but we get debug info
+ // from |handleLock| as |aModule| might be inaccessible.
+ const uintptr_t modStart =
+ aModule.isSome() ? reinterpret_cast<uintptr_t>(*aModule)
+ : reinterpret_cast<uintptr_t>(handleLock.get());
+ const uintptr_t modEnd = modStart + bounds->length();
+
+ std::string breakpadId;
+ std::string pdbPathStr;
+ std::string pdbNameStr;
+ if (const auto* debugInfo = headers.GetPdbInfo()) {
+ MOZ_ASSERT(breakpadId.empty());
+ const GUID& pdbSig = debugInfo->pdbSignature;
+ AppendHex(pdbSig.Data1, breakpadId, WITH_PADDING);
+ AppendHex(pdbSig.Data2, breakpadId, WITH_PADDING);
+ AppendHex(pdbSig.Data3, breakpadId, WITH_PADDING);
+ AppendHex(reinterpret_cast<const unsigned char*>(&pdbSig.Data4),
+ reinterpret_cast<const unsigned char*>(&pdbSig.Data4) +
+ sizeof(pdbSig.Data4),
+ breakpadId);
+ AppendHex(debugInfo->pdbAge, breakpadId, WITHOUT_PADDING);
+
+ // The PDB file name could be different from module filename,
+ // so report both
+ // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb
+ pdbPathStr = debugInfo->pdbFileName;
+ size_t pos = pdbPathStr.find_last_of("\\/");
+ pdbNameStr =
+ (pos != std::string::npos) ? pdbPathStr.substr(pos + 1) : pdbPathStr;
+ }
+
+ std::string codeId;
+ DWORD timestamp;
+ DWORD imageSize;
+ if (headers.GetTimeStamp(timestamp) && headers.GetImageSize(imageSize)) {
+ AppendHex(timestamp, codeId, WITH_PADDING);
+ AppendHex(imageSize, codeId, WITHOUT_PADDING, LOWERCASE);
+ }
+
+ std::string versionStr;
+ uint64_t version;
+ if (headers.GetVersionInfo(version)) {
+ versionStr += std::to_string((version >> 48) & 0xFFFF);
+ versionStr += '.';
+ versionStr += std::to_string((version >> 32) & 0xFFFF);
+ versionStr += '.';
+ versionStr += std::to_string((version >> 16) & 0xFFFF);
+ versionStr += '.';
+ versionStr += std::to_string(version & 0xFFFF);
+ }
+
+ SharedLibrary shlib(modStart, modEnd,
+ 0, // DLLs are always mapped at offset 0 on Windows
+ breakpadId, codeId, moduleNameStr, modulePathStr,
+ pdbNameStr, pdbPathStr, versionStr, "");
+ AddSharedLibrary(shlib);
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibraryInfo sharedLibraryInfo;
+
+ auto addSharedLibraryFromModuleInfo =
+ [&sharedLibraryInfo](const wchar_t* aModulePath, HMODULE aModule) {
+ sharedLibraryInfo.AddSharedLibraryFromModuleInfo(
+ aModulePath, mozilla::Some(aModule));
+ };
+
+ mozilla::EnumerateProcessModules(addSharedLibraryFromModuleInfo);
+ return sharedLibraryInfo;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoFromPath(const wchar_t* aPath) {
+ SharedLibraryInfo sharedLibraryInfo;
+ sharedLibraryInfo.AddSharedLibraryFromModuleInfo(aPath, mozilla::Nothing());
+ return sharedLibraryInfo;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/vtune/ittnotify.h b/mozglue/baseprofiler/core/vtune/ittnotify.h
new file mode 100644
index 0000000000..04adf9eb5e
--- /dev/null
+++ b/mozglue/baseprofiler/core/vtune/ittnotify.h
@@ -0,0 +1,4127 @@
+// clang-format off
+
+/* <copyright>
+ This file is provided under a dual BSD/GPLv2 license. When using or
+ redistributing this file, you may do so under either license.
+
+ GPL LICENSE SUMMARY
+
+ Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of version 2 of the GNU General Public License as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ The full GNU General Public License is included in this distribution
+ in the file called LICENSE.GPL.
+
+ Contact Information:
+ http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
+
+ BSD LICENSE
+
+ Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+</copyright> */
+#ifndef _ITTNOTIFY_H_
+# define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The ITT API is used to annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+# define ITT_OS_FREEBSD 4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# elif defined( __FreeBSD__ )
+# define ITT_OS ITT_OS_FREEBSD
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+# define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
+# elif ITT_OS==ITT_OS_FREEBSD
+# define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define ITTAPI_CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define ITTAPI_CDECL __attribute__ ((cdecl))
+# else /* _M_IX86 || __i386__ */
+# define ITTAPI_CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define STDCALL __attribute__ ((stdcall))
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# include "vtune/legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR 3
+#define ITT_MINOR 0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x) \
+ ITT_JOIN(x, \
+ ITT_JOIN(_, \
+ ITT_JOIN(ITT_MAJOR, \
+ ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+# define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+# define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args) \
+ typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \
+ extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ * - Does not analyze or report errors that involve memory access.
+ * - Other errors are reported as usual. Pausing data collection in
+ * Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ * only pauses tracing and analyzing memory access.
+ * It does not pause tracing or analyzing threading APIs.
+ * .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Does continue to record when new threads are started.
+ * .
+ * - Other effects:
+ * - Possible reduction of runtime overhead.
+ * .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause, (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr ITTNOTIFY_NAME(pause)
+#define __itt_resume ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr 0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr 0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_thread_set_name __itt_thread_set_nameW
+# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+# define __itt_thread_set_name __itt_thread_set_nameA
+# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+ __itt_unsuppress_range,
+ __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching
+ * call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex 2
+
+/**
+@brief Name a synchronization object
+@param[in] addr Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_create __itt_sync_createW
+# define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+# define __itt_sync_create __itt_sync_createA
+# define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr handle for the synchronization object.
+@param[in] name null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_rename __itt_sync_renameW
+# define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+# define __itt_sync_rename __itt_sync_renameA
+# define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API. Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0. The commonly expected idiom is one static handle to
+ * identify a site or task. If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name. These routines also take an instance variable. Like
+ * the lexical instance, these must be 0 initialized. Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site; /*!< @brief handle for lexical site */
+typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task; /*!< @brief handle for lexical site */
+typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+ __itt_model_disable_observation,
+ __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread. The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void))
+#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site. task_end exits the most recently started
+ * but unended task. The handle passed to end may be used to validate
+ * structure. It is unspecified if bad dynamic nesting is detected. If it
+ * is, it should be encoded in the resulting data collection. The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void))
+#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling. Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs. Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification. Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region. This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored. (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly. This applies to BOTH correctness data
+ * collection and performance data collection. For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations. Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop, (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_heap_function_create __itt_heap_function_createW
+# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+# define __itt_heap_function_create __itt_heap_function_createA
+# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void))
+#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void))
+#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask))
+#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+ volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_domain_create __itt_domain_createW
+# define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+# define __itt_domain_create __itt_domain_createA
+# define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name) (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+ unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure. This function
+ * does not affect the collector runtime in any way. After you make the ID with this
+ * function, you still must create it with the __itt_id_create function before using the ID
+ * to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+ __itt_id id = __itt_null;
+ id.d1 = (unsigned long long)((uintptr_t)addr);
+ id.d2 = (unsigned long long)extra;
+ id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+ return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+ const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* strW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_string_handle_create __itt_string_handle_createW
+# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+# define __itt_string_handle_create __itt_string_handle_createA
+# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name) (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+#define __itt_timestamp_none ((__itt_timestamp)-1LL)
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+ __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid))
+#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end)
+#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+ __itt_scope_unknown = 0,
+ __itt_scope_global,
+ __itt_scope_track_group,
+ __itt_scope_track,
+ __itt_scope_task,
+ __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown __itt_scope_unknown
+#define __itt_marker_scope_global __itt_scope_global
+#define __itt_marker_scope_process __itt_scope_track_group
+#define __itt_marker_scope_thread __itt_scope_track
+#define __itt_marker_scope_task __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr ITTNOTIFY_NAME(marker)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+ __itt_metadata_unknown = 0,
+ __itt_metadata_u64, /**< Unsigned 64-bit integer */
+ __itt_metadata_s64, /**< Signed 64-bit integer */
+ __itt_metadata_u32, /**< Unsigned 32-bit integer */
+ __itt_metadata_s32, /**< Signed 32-bit integer */
+ __itt_metadata_u16, /**< Unsigned 16-bit integer */
+ __itt_metadata_s16, /**< Signed 16-bit integer */
+ __itt_metadata_float, /**< Signed 32-bit floating-point */
+ __itt_metadata_double /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add __itt_metadata_str_addW
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add __itt_metadata_str_addA
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+ __itt_relation_is_unknown = 0,
+ __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */
+ __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */
+ __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */
+ __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */
+ __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+ __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+ __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+ unsigned long long clock_freq; /*!< Clock domain frequency */
+ unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+ __itt_clock_info info; /*!< Most recent clock domain info */
+ __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+ void* fn_data; /*!< Input argument for the callback function */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequences and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers.
+ * Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @brief opaque structure for counter identification
+ */
+/** @cond exclude_from_documentation */
+
+typedef struct ___itt_counter* __itt_counter;
+
+/**
+ * @brief Create an unsigned 64 bits integer counter with given name/domain
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer
+ *
+ * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create __itt_counter_createW
+# define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+# define __itt_counter_create __itt_counter_createA
+# define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Increment the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id))
+#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec)
+#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec(id)
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta)
+#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_delta(id, value)
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls increment the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls decrement the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to decrement the counter
+ */
+void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x)
+#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3)
+#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y)
+#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_v3(domain,name)
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3(domain,name,delta)
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} counters group */
+
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr))
+#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value)
+#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value(id, value_ptr)
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr))
+#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex)
+#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given name/domain
+ *
+ * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create_typed __itt_counter_create_typedW
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr
+#else /* UNICODE */
+# define __itt_counter_create_typed __itt_counter_create_typedA
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA)
+#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA)
+#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW)
+#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed)
+#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA(name, domain, type)
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW(name, domain, type)
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed(name, domain, type)
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or
+ * __itt_counter_create_typed()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+ __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ struct ___itt_track* track; /*!< List of child tracks */
+ __itt_track_group_type tgtype; /*!< Type of the track group */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+ __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+ , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ __itt_track_group* group; /*!< Parent group to a track */
+ __itt_track_type ttype; /*!< Type of the track */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name) (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type) (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_event_create __itt_event_createW
+# define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+# define __itt_event_create __itt_event_createA
+# define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen) (__itt_event)0
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+ __itt_e_first = 0,
+ __itt_e_char = 0, /* 1-byte integer */
+ __itt_e_uchar, /* 1-byte unsigned integer */
+ __itt_e_int16, /* 2-byte integer */
+ __itt_e_uint16, /* 2-byte unsigned integer */
+ __itt_e_int32, /* 4-byte integer */
+ __itt_e_uint32, /* 4-byte unsigned integer */
+ __itt_e_int64, /* 8-byte integer */
+ __itt_e_uint64, /* 8-byte unsigned integer */
+ __itt_e_float, /* 4-byte floating */
+ __itt_e_double, /* 8-byte floating */
+ __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_av_save __itt_av_saveW
+# define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+# define __itt_av_save __itt_av_saveA
+# define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+/**
+ * @brief Module load info
+ * This API is used to report necessary information in case of module relocation
+ * @param[in] start_addr - relocated module start address
+ * @param[in] end_addr - relocated module end address
+ * @param[in] path - file system path to the module
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
+void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_module_load __itt_module_loadW
+# define __itt_module_load_ptr __itt_module_loadW_ptr
+#else /* UNICODE */
+# define __itt_module_load __itt_module_loadA
+# define __itt_module_load_ptr __itt_module_loadA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path))
+ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA)
+#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA)
+#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW)
+#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load ITTNOTIFY_VOID(module_load)
+#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA(start_addr, end_addr, path)
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW(start_addr, end_addr, path)
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load(start_addr, end_addr, path)
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ * - It is not shipped to outside of Intel
+ * - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_create __itt_mark_createW
+# define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+# define __itt_mark_create __itt_mark_createA
+# define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name) (__itt_mark_type)0
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ * (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark __itt_markW
+# define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE */
+# define __itt_mark __itt_markA
+# define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter) (int)0
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_global __itt_mark_globalW
+# define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE */
+# define __itt_mark_global __itt_mark_globalA
+# define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter) (int)0
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ * Also returns non-zero value when preceding "begin" point for the
+ * mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ * case of success. It appears in overtime view(s) as a special tick
+ * sign (different from "discrete" mark) together with line from
+ * corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+ __itt_error_success = 0, /*!< no error */
+ __itt_error_no_module = 1, /*!< module can't be loaded */
+ /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+ __itt_error_no_symbol = 2, /*!< symbol not found */
+ /* %1$s -- library name, %2$s -- symbol name. */
+ __itt_error_unknown_group = 3, /*!< unknown group specified */
+ /* %1$s -- env var name, %2$s -- group name. */
+ __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+ /* %1$s -- env var name, %2$d -- system error. */
+ __itt_error_env_too_long = 5, /*!< variable value too long */
+ /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+ __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+ /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version() (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+
+// clang-format on
diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.cpp b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp
new file mode 100644
index 0000000000..0037c943aa
--- /dev/null
+++ b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "BaseProfiler.h"
+#include "PlatformMacros.h"
+#include "AutoObjectMapper.h"
+
+// A helper function for creating failure error messages in
+// AutoObjectMapper*::Map.
+static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed,
+ std::string aFileName) {
+ char buf[300];
+ SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed,
+ aFileName.c_str());
+ buf[sizeof(buf) - 1] = 0;
+ aLog(buf);
+}
+
+AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*))
+ : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {}
+
+AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() {
+ if (!mIsMapped) {
+ // There's nothing to do.
+ MOZ_ASSERT(!mImage);
+ MOZ_ASSERT(mSize == 0);
+ return;
+ }
+ MOZ_ASSERT(mSize > 0);
+ // The following assertion doesn't necessarily have to be true,
+ // but we assume (reasonably enough) that no mmap facility would
+ // be crazy enough to map anything at page zero.
+ MOZ_ASSERT(mImage);
+ munmap(mImage, mSize);
+}
+
+bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length,
+ std::string fileName) {
+ MOZ_ASSERT(!mIsMapped);
+
+ int fd = open(fileName.c_str(), O_RDONLY);
+ if (fd == -1) {
+ failedToMessage(mLog, "open", fileName);
+ return false;
+ }
+
+ struct stat st;
+ int err = fstat(fd, &st);
+ size_t sz = (err == 0) ? st.st_size : 0;
+ if (err != 0 || sz == 0) {
+ failedToMessage(mLog, "fstat", fileName);
+ close(fd);
+ return false;
+ }
+
+ void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0);
+ if (image == MAP_FAILED) {
+ failedToMessage(mLog, "mmap", fileName);
+ close(fd);
+ return false;
+ }
+
+ close(fd);
+ mIsMapped = true;
+ mImage = *start = image;
+ mSize = *length = sz;
+ return true;
+}
diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.h b/mozglue/baseprofiler/lul/AutoObjectMapper.h
new file mode 100644
index 0000000000..f63aa43e0e
--- /dev/null
+++ b/mozglue/baseprofiler/lul/AutoObjectMapper.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AutoObjectMapper_h
+#define AutoObjectMapper_h
+
+#include <string>
+
+#include "mozilla/Attributes.h"
+#include "PlatformMacros.h"
+
+// A (nearly-) RAII class that maps an object in and then unmaps it on
+// destruction. This base class version uses the "normal" POSIX
+// functions: open, fstat, close, mmap, munmap.
+
+class MOZ_STACK_CLASS AutoObjectMapperPOSIX {
+ public:
+ // The constructor does not attempt to map the file, because that
+ // might fail. Instead, once the object has been constructed,
+ // call Map() to attempt the mapping. There is no corresponding
+ // Unmap() since the unmapping is done in the destructor. Failure
+ // messages are sent to |aLog|.
+ explicit AutoObjectMapperPOSIX(void (*aLog)(const char*));
+
+ // Unmap the file on destruction of this object.
+ ~AutoObjectMapperPOSIX();
+
+ // Map |fileName| into the address space and return the mapping
+ // extents. If the file is zero sized this will fail. The file is
+ // mapped read-only and private. Returns true iff the mapping
+ // succeeded, in which case *start and *length hold its extent.
+ // Once a call to Map succeeds, all subsequent calls to it will
+ // fail.
+ bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName);
+
+ protected:
+ // If we are currently holding a mapped object, these record the
+ // mapped address range.
+ void* mImage;
+ size_t mSize;
+
+ // A logging sink, for complaining about mapping failures.
+ void (*mLog)(const char*);
+
+ private:
+ // Are we currently holding a mapped object? This is private to
+ // the base class. Derived classes need to have their own way to
+ // track whether they are holding a mapped object.
+ bool mIsMapped;
+
+ // Disable copying and assignment.
+ AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&);
+ AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&);
+ // Disable heap allocation of this class.
+ void* operator new(size_t);
+ void* operator new[](size_t);
+ void operator delete(void*);
+ void operator delete[](void*);
+};
+
+#endif // AutoObjectMapper_h
diff --git a/mozglue/baseprofiler/lul/LulCommon.cpp b/mozglue/baseprofiler/lul/LulCommon.cpp
new file mode 100644
index 0000000000..f014892a57
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulCommon.cpp
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2011, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/module.cc
+// src/common/unique_string.cc
+
+// There's no internal-only interface for LulCommon. Hence include
+// the external interface directly.
+#include "LulCommonExt.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <map>
+
+#include "BaseProfiler.h"
+
+namespace lul {
+
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+Module::Module(const string& name, const string& os, const string& architecture,
+ const string& id)
+ : name_(name), os_(os), architecture_(architecture), id_(id) {}
+
+Module::~Module() {}
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+class UniqueString {
+ public:
+ explicit UniqueString(string str) { str_ = strdup(str.c_str()); }
+ ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); }
+ const char* str_;
+};
+
+const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; }
+
+bool IsEmptyUniqueString(const UniqueString* ustr) {
+ return (ustr->str_)[0] == '\0';
+}
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+UniqueStringUniverse::~UniqueStringUniverse() {
+ for (std::map<string, UniqueString*>::iterator it = map_.begin();
+ it != map_.end(); it++) {
+ delete it->second;
+ }
+}
+
+const UniqueString* UniqueStringUniverse::ToUniqueString(string str) {
+ std::map<string, UniqueString*>::iterator it = map_.find(str);
+ if (it == map_.end()) {
+ UniqueString* ustr = new UniqueString(str);
+ map_[str] = ustr;
+ return ustr;
+ } else {
+ return it->second;
+ }
+}
+
+} // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulCommonExt.h b/mozglue/baseprofiler/lul/LulCommonExt.h
new file mode 100644
index 0000000000..b20a7321ff
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulCommonExt.h
@@ -0,0 +1,509 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2010, 2012, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// module.h: Define google_breakpad::Module. A Module holds debugging
+// information, and can write that information out as a Breakpad
+// symbol file.
+
+// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+// Copyright (c) 2001, 2002 Peter Dimov
+//
+// Permission to copy, use, modify, sell and distribute this software
+// is granted provided this copyright notice appears in all copies.
+// This software is provided "as is" without express or implied
+// warranty, and with no claim as to its suitability for any purpose.
+//
+// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation.
+//
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/unique_string.h
+// src/common/scoped_ptr.h
+// src/common/module.h
+
+// External interface for the "Common" component of LUL.
+
+#ifndef LulCommonExt_h
+#define LulCommonExt_h
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+#include <vector>
+#include <cstddef> // for std::ptrdiff_t
+
+#include "mozilla/Assertions.h"
+
+namespace lul {
+
+using std::map;
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+
+// Abstract type
+class UniqueString;
+
+// Get the contained C string (debugging only)
+const char* FromUniqueString(const UniqueString*);
+
+// Is the given string empty (that is, "") ?
+bool IsEmptyUniqueString(const UniqueString*);
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+
+// All UniqueStrings live in some specific UniqueStringUniverse.
+class UniqueStringUniverse {
+ public:
+ UniqueStringUniverse() {}
+ ~UniqueStringUniverse();
+ // Convert a |string| to a UniqueString, that lives in this universe.
+ const UniqueString* ToUniqueString(string str);
+
+ private:
+ map<string, UniqueString*> map_;
+};
+
+////////////////////////////////////////////////////////////////
+// GUID
+//
+
+typedef struct {
+ uint32_t data1;
+ uint16_t data2;
+ uint16_t data3;
+ uint8_t data4[8];
+} MDGUID; // GUID
+
+typedef MDGUID GUID;
+
+////////////////////////////////////////////////////////////////
+// scoped_ptr
+//
+
+// scoped_ptr mimics a built-in pointer except that it guarantees deletion
+// of the object pointed to, either on destruction of the scoped_ptr or via
+// an explicit reset(). scoped_ptr is a simple solution for simple needs;
+// use shared_ptr or std::auto_ptr if your needs are more complex.
+
+// *** NOTE ***
+// If your scoped_ptr is a class member of class FOO pointing to a
+// forward declared type BAR (as shown below), then you MUST use a non-inlined
+// version of the destructor. The destructor of a scoped_ptr (called from
+// FOO's destructor) must have a complete definition of BAR in order to
+// destroy it. Example:
+//
+// -- foo.h --
+// class BAR;
+//
+// class FOO {
+// public:
+// FOO();
+// ~FOO(); // Required for sources that instantiate class FOO to compile!
+//
+// private:
+// scoped_ptr<BAR> bar_;
+// };
+//
+// -- foo.cc --
+// #include "foo.h"
+// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition.
+
+// scoped_ptr_malloc added by Google
+// When one of these goes out of scope, instead of doing a delete or
+// delete[], it calls free(). scoped_ptr_malloc<char> is likely to see
+// much more use than any other specializations.
+
+// release() added by Google
+// Use this to conditionally transfer ownership of a heap-allocated object
+// to the caller, usually on method success.
+
+template <typename T>
+class scoped_ptr {
+ private:
+ T* ptr;
+
+ scoped_ptr(scoped_ptr const&);
+ scoped_ptr& operator=(scoped_ptr const&);
+
+ public:
+ typedef T element_type;
+
+ explicit scoped_ptr(T* p = 0) : ptr(p) {}
+
+ ~scoped_ptr() { delete ptr; }
+
+ void reset(T* p = 0) {
+ if (ptr != p) {
+ delete ptr;
+ ptr = p;
+ }
+ }
+
+ T& operator*() const {
+ MOZ_ASSERT(ptr != 0);
+ return *ptr;
+ }
+
+ T* operator->() const {
+ MOZ_ASSERT(ptr != 0);
+ return ptr;
+ }
+
+ bool operator==(T* p) const { return ptr == p; }
+
+ bool operator!=(T* p) const { return ptr != p; }
+
+ T* get() const { return ptr; }
+
+ void swap(scoped_ptr& b) {
+ T* tmp = b.ptr;
+ b.ptr = ptr;
+ ptr = tmp;
+ }
+
+ T* release() {
+ T* tmp = ptr;
+ ptr = 0;
+ return tmp;
+ }
+
+ private:
+ // no reason to use these: each scoped_ptr should have its own object
+ template <typename U>
+ bool operator==(scoped_ptr<U> const& p) const;
+ template <typename U>
+ bool operator!=(scoped_ptr<U> const& p) const;
+};
+
+template <typename T>
+inline void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) {
+ a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_ptr<T>& b) {
+ return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_ptr<T>& b) {
+ return p != b.get();
+}
+
+// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to
+// is guaranteed, either on destruction of the scoped_array or via an explicit
+// reset(). Use shared_array or std::vector if your needs are more complex.
+
+template <typename T>
+class scoped_array {
+ private:
+ T* ptr;
+
+ scoped_array(scoped_array const&);
+ scoped_array& operator=(scoped_array const&);
+
+ public:
+ typedef T element_type;
+
+ explicit scoped_array(T* p = 0) : ptr(p) {}
+
+ ~scoped_array() { delete[] ptr; }
+
+ void reset(T* p = 0) {
+ if (ptr != p) {
+ delete[] ptr;
+ ptr = p;
+ }
+ }
+
+ T& operator[](std::ptrdiff_t i) const {
+ MOZ_ASSERT(ptr != 0);
+ MOZ_ASSERT(i >= 0);
+ return ptr[i];
+ }
+
+ bool operator==(T* p) const { return ptr == p; }
+
+ bool operator!=(T* p) const { return ptr != p; }
+
+ T* get() const { return ptr; }
+
+ void swap(scoped_array& b) {
+ T* tmp = b.ptr;
+ b.ptr = ptr;
+ ptr = tmp;
+ }
+
+ T* release() {
+ T* tmp = ptr;
+ ptr = 0;
+ return tmp;
+ }
+
+ private:
+ // no reason to use these: each scoped_array should have its own object
+ template <typename U>
+ bool operator==(scoped_array<U> const& p) const;
+ template <typename U>
+ bool operator!=(scoped_array<U> const& p) const;
+};
+
+template <class T>
+inline void swap(scoped_array<T>& a, scoped_array<T>& b) {
+ a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_array<T>& b) {
+ return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_array<T>& b) {
+ return p != b.get();
+}
+
+// This class wraps the c library function free() in a class that can be
+// passed as a template argument to scoped_ptr_malloc below.
+class ScopedPtrMallocFree {
+ public:
+ inline void operator()(void* x) const { free(x); }
+};
+
+// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a
+// second template argument, the functor used to free the object.
+
+template <typename T, typename FreeProc = ScopedPtrMallocFree>
+class scoped_ptr_malloc {
+ private:
+ T* ptr;
+
+ scoped_ptr_malloc(scoped_ptr_malloc const&);
+ scoped_ptr_malloc& operator=(scoped_ptr_malloc const&);
+
+ public:
+ typedef T element_type;
+
+ explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {}
+
+ ~scoped_ptr_malloc() { free_((void*)ptr); }
+
+ void reset(T* p = 0) {
+ if (ptr != p) {
+ free_((void*)ptr);
+ ptr = p;
+ }
+ }
+
+ T& operator*() const {
+ MOZ_ASSERT(ptr != 0);
+ return *ptr;
+ }
+
+ T* operator->() const {
+ MOZ_ASSERT(ptr != 0);
+ return ptr;
+ }
+
+ bool operator==(T* p) const { return ptr == p; }
+
+ bool operator!=(T* p) const { return ptr != p; }
+
+ T* get() const { return ptr; }
+
+ void swap(scoped_ptr_malloc& b) {
+ T* tmp = b.ptr;
+ b.ptr = ptr;
+ ptr = tmp;
+ }
+
+ T* release() {
+ T* tmp = ptr;
+ ptr = 0;
+ return tmp;
+ }
+
+ private:
+ // no reason to use these: each scoped_ptr_malloc should have its own object
+ template <typename U, typename GP>
+ bool operator==(scoped_ptr_malloc<U, GP> const& p) const;
+ template <typename U, typename GP>
+ bool operator!=(scoped_ptr_malloc<U, GP> const& p) const;
+
+ static FreeProc const free_;
+};
+
+template <typename T, typename FP>
+FP const scoped_ptr_malloc<T, FP>::free_ = FP();
+
+template <typename T, typename FP>
+inline void swap(scoped_ptr_malloc<T, FP>& a, scoped_ptr_malloc<T, FP>& b) {
+ a.swap(b);
+}
+
+template <typename T, typename FP>
+inline bool operator==(T* p, const scoped_ptr_malloc<T, FP>& b) {
+ return p == b.get();
+}
+
+template <typename T, typename FP>
+inline bool operator!=(T* p, const scoped_ptr_malloc<T, FP>& b) {
+ return p != b.get();
+}
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+ public:
+ // The type of addresses and sizes in a symbol table.
+ typedef uint64_t Address;
+
+ // Representation of an expression. This can either be a postfix
+ // expression, in which case it is stored as a string, or a simple
+ // expression of the form (identifier + imm) or *(identifier + imm).
+ // It can also be invalid (denoting "no value").
+ enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem };
+
+ struct Expr {
+ // Construct a simple-form expression
+ Expr(const UniqueString* ident, long offset, bool deref) {
+ if (IsEmptyUniqueString(ident)) {
+ Expr();
+ } else {
+ postfix_ = "";
+ ident_ = ident;
+ offset_ = offset;
+ how_ = deref ? kExprSimpleMem : kExprSimple;
+ }
+ }
+
+ // Construct an invalid expression
+ Expr() {
+ postfix_ = "";
+ ident_ = nullptr;
+ offset_ = 0;
+ how_ = kExprInvalid;
+ }
+
+ // Return the postfix expression string, either directly,
+ // if this is a postfix expression, or by synthesising it
+ // for a simple expression.
+ std::string getExprPostfix() const {
+ switch (how_) {
+ case kExprPostfix:
+ return postfix_;
+ case kExprSimple:
+ case kExprSimpleMem: {
+ char buf[40];
+ sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+',
+ how_ == kExprSimple ? "" : " ^");
+ return std::string(FromUniqueString(ident_)) + std::string(buf);
+ }
+ case kExprInvalid:
+ default:
+ MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type");
+ return "Expr::genExprPostfix: kExprInvalid";
+ }
+ }
+
+ // The identifier that gives the starting value for simple expressions.
+ const UniqueString* ident_;
+ // The offset to add for simple expressions.
+ long offset_;
+ // The Postfix expression string to evaluate for non-simple expressions.
+ std::string postfix_;
+ // The operation expressed by this expression.
+ ExprHow how_;
+ };
+
+ // A map from register names to expressions that recover
+ // their values. This can represent a complete set of rules to
+ // follow at some address, or a set of changes to be applied to an
+ // extant set of rules.
+ // NOTE! there are two completely different types called RuleMap. This
+ // is one of them.
+ typedef std::map<const UniqueString*, Expr> RuleMap;
+
+ // A map from addresses to RuleMaps, representing changes that take
+ // effect at given addresses.
+ typedef std::map<Address, RuleMap> RuleChangeMap;
+
+ // A range of 'STACK CFI' stack walking information. An instance of
+ // this structure corresponds to a 'STACK CFI INIT' record and the
+ // subsequent 'STACK CFI' records that fall within its range.
+ struct StackFrameEntry {
+ // The starting address and number of bytes of machine code this
+ // entry covers.
+ Address address, size;
+
+ // The initial register recovery rules, in force at the starting
+ // address.
+ RuleMap initial_rules;
+
+ // A map from addresses to rule changes. To find the rules in
+ // force at a given address, start with initial_rules, and then
+ // apply the changes given in this map for all addresses up to and
+ // including the address you're interested in.
+ RuleChangeMap rule_changes;
+ };
+
+ // Create a new module with the given name, operating system,
+ // architecture, and ID string.
+ Module(const std::string& name, const std::string& os,
+ const std::string& architecture, const std::string& id);
+ ~Module();
+
+ private:
+ // Module header entries.
+ std::string name_, os_, architecture_, id_;
+};
+
+} // namespace lul
+
+#endif // LulCommonExt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarf.cpp b/mozglue/baseprofiler/lul/LulDwarf.cpp
new file mode 100644
index 0000000000..c83296fc62
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarf.cpp
@@ -0,0 +1,2252 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
+// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/dwarf/bytereader.cc
+// src/common/dwarf/dwarf2reader.cc
+// src/common/dwarf_cfi_to_module.cc
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <map>
+#include <stack>
+#include <string>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulCommonExt.h"
+#include "LulDwarfInt.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_DWARF 0
+
+namespace lul {
+
+using std::string;
+
+ByteReader::ByteReader(enum Endianness endian)
+ : offset_reader_(NULL),
+ address_reader_(NULL),
+ endian_(endian),
+ address_size_(0),
+ offset_size_(0),
+ have_section_base_(),
+ have_text_base_(),
+ have_data_base_(),
+ have_function_base_() {}
+
+ByteReader::~ByteReader() {}
+
+void ByteReader::SetOffsetSize(uint8 size) {
+ offset_size_ = size;
+ MOZ_ASSERT(size == 4 || size == 8);
+ if (size == 4) {
+ this->offset_reader_ = &ByteReader::ReadFourBytes;
+ } else {
+ this->offset_reader_ = &ByteReader::ReadEightBytes;
+ }
+}
+
+void ByteReader::SetAddressSize(uint8 size) {
+ address_size_ = size;
+ MOZ_ASSERT(size == 4 || size == 8);
+ if (size == 4) {
+ this->address_reader_ = &ByteReader::ReadFourBytes;
+ } else {
+ this->address_reader_ = &ByteReader::ReadEightBytes;
+ }
+}
+
+uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) {
+ const uint64 initial_length = ReadFourBytes(start);
+ start += 4;
+
+ // In DWARF2/3, if the initial length is all 1 bits, then the offset
+ // size is 8 and we need to read the next 8 bytes for the real length.
+ if (initial_length == 0xffffffff) {
+ SetOffsetSize(8);
+ *len = 12;
+ return ReadOffset(start);
+ } else {
+ SetOffsetSize(4);
+ *len = 4;
+ }
+ return initial_length;
+}
+
+bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
+ if (encoding == DW_EH_PE_omit) return true;
+ if (encoding == DW_EH_PE_aligned) return true;
+ if ((encoding & 0x7) > DW_EH_PE_udata8) return false;
+ if ((encoding & 0x70) > DW_EH_PE_funcrel) return false;
+ return true;
+}
+
+bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
+ switch (encoding & 0x70) {
+ case DW_EH_PE_absptr:
+ return true;
+ case DW_EH_PE_pcrel:
+ return have_section_base_;
+ case DW_EH_PE_textrel:
+ return have_text_base_;
+ case DW_EH_PE_datarel:
+ return have_data_base_;
+ case DW_EH_PE_funcrel:
+ return have_function_base_;
+ default:
+ return false;
+ }
+}
+
+uint64 ByteReader::ReadEncodedPointer(const char* buffer,
+ DwarfPointerEncoding encoding,
+ size_t* len) const {
+ // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
+ // see it here.
+ MOZ_ASSERT(encoding != DW_EH_PE_omit);
+
+ // The Linux Standards Base 4.0 does not make this clear, but the
+ // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
+ // agree that aligned pointers are always absolute, machine-sized,
+ // machine-signed pointers.
+ if (encoding == DW_EH_PE_aligned) {
+ MOZ_ASSERT(have_section_base_);
+
+ // We don't need to align BUFFER in *our* address space. Rather, we
+ // need to find the next position in our buffer that would be aligned
+ // when the .eh_frame section the buffer contains is loaded into the
+ // program's memory. So align assuming that buffer_base_ gets loaded at
+ // address section_base_, where section_base_ itself may or may not be
+ // aligned.
+
+ // First, find the offset to START from the closest prior aligned
+ // address.
+ uint64 skew = section_base_ & (AddressSize() - 1);
+ // Now find the offset from that aligned address to buffer.
+ uint64 offset = skew + (buffer - buffer_base_);
+ // Round up to the next boundary.
+ uint64 aligned = (offset + AddressSize() - 1) & -AddressSize();
+ // Convert back to a pointer.
+ const char* aligned_buffer = buffer_base_ + (aligned - skew);
+ // Finally, store the length and actually fetch the pointer.
+ *len = aligned_buffer - buffer + AddressSize();
+ return ReadAddress(aligned_buffer);
+ }
+
+ // Extract the value first, ignoring whether it's a pointer or an
+ // offset relative to some base.
+ uint64 offset;
+ switch (encoding & 0x0f) {
+ case DW_EH_PE_absptr:
+ // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
+ // both the high and low nybble of encoding bytes. When it appears in
+ // the high nybble, it means that the pointer is absolute, not an
+ // offset from some base address. When it appears in the low nybble,
+ // as here, it means that the pointer is stored as a normal
+ // machine-sized and machine-signed address. A low nybble of
+ // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
+ // correct for us to treat the value as an offset from a base address
+ // if the upper nybble is not DW_EH_PE_absptr.
+ offset = ReadAddress(buffer);
+ *len = AddressSize();
+ break;
+
+ case DW_EH_PE_uleb128:
+ offset = ReadUnsignedLEB128(buffer, len);
+ break;
+
+ case DW_EH_PE_udata2:
+ offset = ReadTwoBytes(buffer);
+ *len = 2;
+ break;
+
+ case DW_EH_PE_udata4:
+ offset = ReadFourBytes(buffer);
+ *len = 4;
+ break;
+
+ case DW_EH_PE_udata8:
+ offset = ReadEightBytes(buffer);
+ *len = 8;
+ break;
+
+ case DW_EH_PE_sleb128:
+ offset = ReadSignedLEB128(buffer, len);
+ break;
+
+ case DW_EH_PE_sdata2:
+ offset = ReadTwoBytes(buffer);
+ // Sign-extend from 16 bits.
+ offset = (offset ^ 0x8000) - 0x8000;
+ *len = 2;
+ break;
+
+ case DW_EH_PE_sdata4:
+ offset = ReadFourBytes(buffer);
+ // Sign-extend from 32 bits.
+ offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
+ *len = 4;
+ break;
+
+ case DW_EH_PE_sdata8:
+ // No need to sign-extend; this is the full width of our type.
+ offset = ReadEightBytes(buffer);
+ *len = 8;
+ break;
+
+ default:
+ abort();
+ }
+
+ // Find the appropriate base address.
+ uint64 base;
+ switch (encoding & 0x70) {
+ case DW_EH_PE_absptr:
+ base = 0;
+ break;
+
+ case DW_EH_PE_pcrel:
+ MOZ_ASSERT(have_section_base_);
+ base = section_base_ + (buffer - buffer_base_);
+ break;
+
+ case DW_EH_PE_textrel:
+ MOZ_ASSERT(have_text_base_);
+ base = text_base_;
+ break;
+
+ case DW_EH_PE_datarel:
+ MOZ_ASSERT(have_data_base_);
+ base = data_base_;
+ break;
+
+ case DW_EH_PE_funcrel:
+ MOZ_ASSERT(have_function_base_);
+ base = function_base_;
+ break;
+
+ default:
+ abort();
+ }
+
+ uint64 pointer = base + offset;
+
+ // Remove inappropriate upper bits.
+ if (AddressSize() == 4)
+ pointer = pointer & 0xffffffff;
+ else
+ MOZ_ASSERT(AddressSize() == sizeof(uint64));
+
+ return pointer;
+}
+
+// A DWARF rule for recovering the address or value of a register, or
+// computing the canonical frame address. There is one subclass of this for
+// each '*Rule' member function in CallFrameInfo::Handler.
+//
+// It's annoying that we have to handle Rules using pointers (because
+// the concrete instances can have an arbitrary size). They're small,
+// so it would be much nicer if we could just handle them by value
+// instead of fretting about ownership and destruction.
+//
+// It seems like all these could simply be instances of std::tr1::bind,
+// except that we need instances to be EqualityComparable, too.
+//
+// This could logically be nested within State, but then the qualified names
+// get horrendous.
+class CallFrameInfo::Rule {
+ public:
+ virtual ~Rule() {}
+
+ // Tell HANDLER that, at ADDRESS in the program, REG can be
+ // recovered using this rule. If REG is kCFARegister, then this rule
+ // describes how to compute the canonical frame address. Return what the
+ // HANDLER member function returned.
+ virtual bool Handle(Handler* handler, uint64 address, int reg) const = 0;
+
+ // Equality on rules. We use these to decide which rules we need
+ // to report after a DW_CFA_restore_state instruction.
+ virtual bool operator==(const Rule& rhs) const = 0;
+
+ bool operator!=(const Rule& rhs) const { return !(*this == rhs); }
+
+ // Return a pointer to a copy of this rule.
+ virtual Rule* Copy() const = 0;
+
+ // If this is a base+offset rule, change its base register to REG.
+ // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
+ virtual void SetBaseRegister(unsigned reg) {}
+
+ // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
+ // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
+ virtual void SetOffset(long long offset) {}
+
+ // A RTTI workaround, to make it possible to implement equality
+ // comparisons on classes derived from this one.
+ enum CFIRTag {
+ CFIR_UNDEFINED_RULE,
+ CFIR_SAME_VALUE_RULE,
+ CFIR_OFFSET_RULE,
+ CFIR_VAL_OFFSET_RULE,
+ CFIR_REGISTER_RULE,
+ CFIR_EXPRESSION_RULE,
+ CFIR_VAL_EXPRESSION_RULE
+ };
+
+ // Produce the tag that identifies the child class of this object.
+ virtual CFIRTag getTag() const = 0;
+};
+
+// Rule: the value the register had in the caller cannot be recovered.
+class CallFrameInfo::UndefinedRule : public CallFrameInfo::Rule {
+ public:
+ UndefinedRule() {}
+ ~UndefinedRule() {}
+ CFIRTag getTag() const override { return CFIR_UNDEFINED_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->UndefinedRule(address, reg);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_UNDEFINED_RULE) return false;
+ return true;
+ }
+ Rule* Copy() const override { return new UndefinedRule(*this); }
+};
+
+// Rule: the register's value is the same as that it had in the caller.
+class CallFrameInfo::SameValueRule : public CallFrameInfo::Rule {
+ public:
+ SameValueRule() {}
+ ~SameValueRule() {}
+ CFIRTag getTag() const override { return CFIR_SAME_VALUE_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->SameValueRule(address, reg);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_SAME_VALUE_RULE) return false;
+ return true;
+ }
+ Rule* Copy() const override { return new SameValueRule(*this); }
+};
+
+// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
+// may be CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::OffsetRule : public CallFrameInfo::Rule {
+ public:
+ OffsetRule(int base_register, long offset)
+ : base_register_(base_register), offset_(offset) {}
+ ~OffsetRule() {}
+ CFIRTag getTag() const override { return CFIR_OFFSET_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->OffsetRule(address, reg, base_register_, offset_);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_OFFSET_RULE) return false;
+ const OffsetRule* our_rhs = static_cast<const OffsetRule*>(&rhs);
+ return (base_register_ == our_rhs->base_register_ &&
+ offset_ == our_rhs->offset_);
+ }
+ Rule* Copy() const override { return new OffsetRule(*this); }
+ // We don't actually need SetBaseRegister or SetOffset here, since they
+ // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
+ // doesn't make sense to use OffsetRule for computing the CFA: it
+ // computes the address at which a register is saved, not a value.
+ private:
+ int base_register_;
+ long offset_;
+};
+
+// Rule: the value the register had in the caller is the value of
+// BASE_REGISTER plus offset. BASE_REGISTER may be
+// CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::ValOffsetRule : public CallFrameInfo::Rule {
+ public:
+ ValOffsetRule(int base_register, long offset)
+ : base_register_(base_register), offset_(offset) {}
+ ~ValOffsetRule() {}
+ CFIRTag getTag() const override { return CFIR_VAL_OFFSET_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->ValOffsetRule(address, reg, base_register_, offset_);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_VAL_OFFSET_RULE) return false;
+ const ValOffsetRule* our_rhs = static_cast<const ValOffsetRule*>(&rhs);
+ return (base_register_ == our_rhs->base_register_ &&
+ offset_ == our_rhs->offset_);
+ }
+ Rule* Copy() const override { return new ValOffsetRule(*this); }
+ void SetBaseRegister(unsigned reg) override { base_register_ = reg; }
+ void SetOffset(long long offset) override { offset_ = offset; }
+
+ private:
+ int base_register_;
+ long offset_;
+};
+
+// Rule: the register has been saved in another register REGISTER_NUMBER_.
+class CallFrameInfo::RegisterRule : public CallFrameInfo::Rule {
+ public:
+ explicit RegisterRule(int register_number)
+ : register_number_(register_number) {}
+ ~RegisterRule() {}
+ CFIRTag getTag() const override { return CFIR_REGISTER_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->RegisterRule(address, reg, register_number_);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_REGISTER_RULE) return false;
+ const RegisterRule* our_rhs = static_cast<const RegisterRule*>(&rhs);
+ return (register_number_ == our_rhs->register_number_);
+ }
+ Rule* Copy() const override { return new RegisterRule(*this); }
+
+ private:
+ int register_number_;
+};
+
+// Rule: EXPRESSION evaluates to the address at which the register is saved.
+class CallFrameInfo::ExpressionRule : public CallFrameInfo::Rule {
+ public:
+ explicit ExpressionRule(const string& expression) : expression_(expression) {}
+ ~ExpressionRule() {}
+ CFIRTag getTag() const override { return CFIR_EXPRESSION_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->ExpressionRule(address, reg, expression_);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_EXPRESSION_RULE) return false;
+ const ExpressionRule* our_rhs = static_cast<const ExpressionRule*>(&rhs);
+ return (expression_ == our_rhs->expression_);
+ }
+ Rule* Copy() const override { return new ExpressionRule(*this); }
+
+ private:
+ string expression_;
+};
+
+// Rule: EXPRESSION evaluates to the previous value of the register.
+class CallFrameInfo::ValExpressionRule : public CallFrameInfo::Rule {
+ public:
+ explicit ValExpressionRule(const string& expression)
+ : expression_(expression) {}
+ ~ValExpressionRule() {}
+ CFIRTag getTag() const override { return CFIR_VAL_EXPRESSION_RULE; }
+ bool Handle(Handler* handler, uint64 address, int reg) const override {
+ return handler->ValExpressionRule(address, reg, expression_);
+ }
+ bool operator==(const Rule& rhs) const override {
+ if (rhs.getTag() != CFIR_VAL_EXPRESSION_RULE) return false;
+ const ValExpressionRule* our_rhs =
+ static_cast<const ValExpressionRule*>(&rhs);
+ return (expression_ == our_rhs->expression_);
+ }
+ Rule* Copy() const override { return new ValExpressionRule(*this); }
+
+ private:
+ string expression_;
+};
+
+// A map from register numbers to rules.
+class CallFrameInfo::RuleMap {
+ public:
+ RuleMap() : cfa_rule_(NULL) {}
+ RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
+ ~RuleMap() { Clear(); }
+
+ RuleMap& operator=(const RuleMap& rhs);
+
+ // Set the rule for computing the CFA to RULE. Take ownership of RULE.
+ void SetCFARule(Rule* rule) {
+ delete cfa_rule_;
+ cfa_rule_ = rule;
+ }
+
+ // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
+ // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
+ // DW_CFA_def_cfa_register, and for detecting references to the CFA before
+ // a rule for it has been established.
+ Rule* CFARule() const { return cfa_rule_; }
+
+ // Return the rule for REG, or NULL if there is none. The caller takes
+ // ownership of the result.
+ Rule* RegisterRule(int reg) const;
+
+ // Set the rule for computing REG to RULE. Take ownership of RULE.
+ void SetRegisterRule(int reg, Rule* rule);
+
+ // Make all the appropriate calls to HANDLER as if we were changing from
+ // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
+ // DW_CFA_restore_state, where lots of rules can change simultaneously.
+ // Return true if all handlers returned true; otherwise, return false.
+ bool HandleTransitionTo(Handler* handler, uint64 address,
+ const RuleMap& new_rules) const;
+
+ private:
+ // A map from register numbers to Rules.
+ typedef std::map<int, Rule*> RuleByNumber;
+
+ // Remove all register rules and clear cfa_rule_.
+ void Clear();
+
+ // The rule for computing the canonical frame address. This RuleMap owns
+ // this rule.
+ Rule* cfa_rule_;
+
+ // A map from register numbers to postfix expressions to recover
+ // their values. This RuleMap owns the Rules the map refers to.
+ RuleByNumber registers_;
+};
+
+CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
+ Clear();
+ // Since each map owns the rules it refers to, assignment must copy them.
+ if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
+ for (RuleByNumber::const_iterator it = rhs.registers_.begin();
+ it != rhs.registers_.end(); it++)
+ registers_[it->first] = it->second->Copy();
+ return *this;
+}
+
+CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
+ MOZ_ASSERT(reg != Handler::kCFARegister);
+ RuleByNumber::const_iterator it = registers_.find(reg);
+ if (it != registers_.end())
+ return it->second->Copy();
+ else
+ return NULL;
+}
+
+void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
+ MOZ_ASSERT(reg != Handler::kCFARegister);
+ MOZ_ASSERT(rule);
+ Rule** slot = &registers_[reg];
+ delete *slot;
+ *slot = rule;
+}
+
+bool CallFrameInfo::RuleMap::HandleTransitionTo(
+ Handler* handler, uint64 address, const RuleMap& new_rules) const {
+ // Transition from cfa_rule_ to new_rules.cfa_rule_.
+ if (cfa_rule_ && new_rules.cfa_rule_) {
+ if (*cfa_rule_ != *new_rules.cfa_rule_ &&
+ !new_rules.cfa_rule_->Handle(handler, address, Handler::kCFARegister))
+ return false;
+ } else if (cfa_rule_) {
+ // this RuleMap has a CFA rule but new_rules doesn't.
+ // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
+ // it's garbage input. The instruction interpreter should have
+ // detected this and warned, so take no action here.
+ } else if (new_rules.cfa_rule_) {
+ // This shouldn't be possible: NEW_RULES is some prior state, and
+ // there's no way to remove entries.
+ MOZ_ASSERT(0);
+ } else {
+ // Both CFA rules are empty. No action needed.
+ }
+
+ // Traverse the two maps in order by register number, and report
+ // whatever differences we find.
+ RuleByNumber::const_iterator old_it = registers_.begin();
+ RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
+ while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
+ if (old_it->first < new_it->first) {
+ // This RuleMap has an entry for old_it->first, but NEW_RULES
+ // doesn't.
+ //
+ // This isn't really the right thing to do, but since CFI generally
+ // only mentions callee-saves registers, and GCC's convention for
+ // callee-saves registers is that they are unchanged, it's a good
+ // approximation.
+ if (!handler->SameValueRule(address, old_it->first)) return false;
+ old_it++;
+ } else if (old_it->first > new_it->first) {
+ // NEW_RULES has entry for new_it->first, but this RuleMap
+ // doesn't. This shouldn't be possible: NEW_RULES is some prior
+ // state, and there's no way to remove entries.
+ MOZ_ASSERT(0);
+ } else {
+ // Both maps have an entry for this register. Report the new
+ // rule if it is different.
+ if (*old_it->second != *new_it->second &&
+ !new_it->second->Handle(handler, address, new_it->first))
+ return false;
+ new_it++;
+ old_it++;
+ }
+ }
+ // Finish off entries from this RuleMap with no counterparts in new_rules.
+ while (old_it != registers_.end()) {
+ if (!handler->SameValueRule(address, old_it->first)) return false;
+ old_it++;
+ }
+ // Since we only make transitions from a rule set to some previously
+ // saved rule set, and we can only add rules to the map, NEW_RULES
+ // must have fewer rules than *this.
+ MOZ_ASSERT(new_it == new_rules.registers_.end());
+
+ return true;
+}
+
+// Remove all register rules and clear cfa_rule_.
+void CallFrameInfo::RuleMap::Clear() {
+ delete cfa_rule_;
+ cfa_rule_ = NULL;
+ for (RuleByNumber::iterator it = registers_.begin(); it != registers_.end();
+ it++)
+ delete it->second;
+ registers_.clear();
+}
+
+// The state of the call frame information interpreter as it processes
+// instructions from a CIE and FDE.
+class CallFrameInfo::State {
+ public:
+ // Create a call frame information interpreter state with the given
+ // reporter, reader, handler, and initial call frame info address.
+ State(ByteReader* reader, Handler* handler, Reporter* reporter,
+ uint64 address)
+ : reader_(reader),
+ handler_(handler),
+ reporter_(reporter),
+ address_(address),
+ entry_(NULL),
+ cursor_(NULL),
+ saved_rules_(NULL) {}
+
+ ~State() {
+ if (saved_rules_) delete saved_rules_;
+ }
+
+ // Interpret instructions from CIE, save the resulting rule set for
+ // DW_CFA_restore instructions, and return true. On error, report
+ // the problem to reporter_ and return false.
+ bool InterpretCIE(const CIE& cie);
+
+ // Interpret instructions from FDE, and return true. On error,
+ // report the problem to reporter_ and return false.
+ bool InterpretFDE(const FDE& fde);
+
+ private:
+ // The operands of a CFI instruction, for ParseOperands.
+ struct Operands {
+ unsigned register_number; // A register number.
+ uint64 offset; // An offset or address.
+ long signed_offset; // A signed offset.
+ string expression; // A DWARF expression.
+ };
+
+ // Parse CFI instruction operands from STATE's instruction stream as
+ // described by FORMAT. On success, populate OPERANDS with the
+ // results, and return true. On failure, report the problem and
+ // return false.
+ //
+ // Each character of FORMAT should be one of the following:
+ //
+ // 'r' unsigned LEB128 register number (OPERANDS->register_number)
+ // 'o' unsigned LEB128 offset (OPERANDS->offset)
+ // 's' signed LEB128 offset (OPERANDS->signed_offset)
+ // 'a' machine-size address (OPERANDS->offset)
+ // (If the CIE has a 'z' augmentation string, 'a' uses the
+ // encoding specified by the 'R' argument.)
+ // '1' a one-byte offset (OPERANDS->offset)
+ // '2' a two-byte offset (OPERANDS->offset)
+ // '4' a four-byte offset (OPERANDS->offset)
+ // '8' an eight-byte offset (OPERANDS->offset)
+ // 'e' a DW_FORM_block holding a (OPERANDS->expression)
+ // DWARF expression
+ bool ParseOperands(const char* format, Operands* operands);
+
+ // Interpret one CFI instruction from STATE's instruction stream, update
+ // STATE, report any rule changes to handler_, and return true. On
+ // failure, report the problem and return false.
+ bool DoInstruction();
+
+ // The following Do* member functions are subroutines of DoInstruction,
+ // factoring out the actual work of operations that have several
+ // different encodings.
+
+ // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
+ // return true. On failure, report and return false. (Used for
+ // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
+ bool DoDefCFA(unsigned base_register, long offset);
+
+ // Change the offset of the CFA rule to OFFSET, and return true. On
+ // failure, report and return false. (Subroutine for
+ // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
+ bool DoDefCFAOffset(long offset);
+
+ // Specify that REG can be recovered using RULE, and return true. On
+ // failure, report and return false.
+ bool DoRule(unsigned reg, Rule* rule);
+
+ // Specify that REG can be found at OFFSET from the CFA, and return true.
+ // On failure, report and return false. (Subroutine for DW_CFA_offset,
+ // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
+ bool DoOffset(unsigned reg, long offset);
+
+ // Specify that the caller's value for REG is the CFA plus OFFSET,
+ // and return true. On failure, report and return false. (Subroutine
+ // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
+ bool DoValOffset(unsigned reg, long offset);
+
+ // Restore REG to the rule established in the CIE, and return true. On
+ // failure, report and return false. (Subroutine for DW_CFA_restore and
+ // DW_CFA_restore_extended.)
+ bool DoRestore(unsigned reg);
+
+ // Return the section offset of the instruction at cursor. For use
+ // in error messages.
+ uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
+
+ // Report that entry_ is incomplete, and return false. For brevity.
+ bool ReportIncomplete() {
+ reporter_->Incomplete(entry_->offset, entry_->kind);
+ return false;
+ }
+
+ // For reading multi-byte values with the appropriate endianness.
+ ByteReader* reader_;
+
+ // The handler to which we should report the data we find.
+ Handler* handler_;
+
+ // For reporting problems in the info we're parsing.
+ Reporter* reporter_;
+
+ // The code address to which the next instruction in the stream applies.
+ uint64 address_;
+
+ // The entry whose instructions we are currently processing. This is
+ // first a CIE, and then an FDE.
+ const Entry* entry_;
+
+ // The next instruction to process.
+ const char* cursor_;
+
+ // The current set of rules.
+ RuleMap rules_;
+
+ // The set of rules established by the CIE, used by DW_CFA_restore
+ // and DW_CFA_restore_extended. We set this after interpreting the
+ // CIE's instructions.
+ RuleMap cie_rules_;
+
+ // A stack of saved states, for DW_CFA_remember_state and
+ // DW_CFA_restore_state.
+ std::stack<RuleMap>* saved_rules_;
+};
+
+bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
+ entry_ = &cie;
+ cursor_ = entry_->instructions;
+ while (cursor_ < entry_->end)
+ if (!DoInstruction()) return false;
+ // Note the rules established by the CIE, for use by DW_CFA_restore
+ // and DW_CFA_restore_extended.
+ cie_rules_ = rules_;
+ return true;
+}
+
+bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
+ entry_ = &fde;
+ cursor_ = entry_->instructions;
+ while (cursor_ < entry_->end)
+ if (!DoInstruction()) return false;
+ return true;
+}
+
+bool CallFrameInfo::State::ParseOperands(const char* format,
+ Operands* operands) {
+ size_t len;
+ const char* operand;
+
+ for (operand = format; *operand; operand++) {
+ size_t bytes_left = entry_->end - cursor_;
+ switch (*operand) {
+ case 'r':
+ operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 'o':
+ operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 's':
+ operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case 'a':
+ operands->offset = reader_->ReadEncodedPointer(
+ cursor_, entry_->cie->pointer_encoding, &len);
+ if (len > bytes_left) return ReportIncomplete();
+ cursor_ += len;
+ break;
+
+ case '1':
+ if (1 > bytes_left) return ReportIncomplete();
+ operands->offset = static_cast<unsigned char>(*cursor_++);
+ break;
+
+ case '2':
+ if (2 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadTwoBytes(cursor_);
+ cursor_ += 2;
+ break;
+
+ case '4':
+ if (4 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadFourBytes(cursor_);
+ cursor_ += 4;
+ break;
+
+ case '8':
+ if (8 > bytes_left) return ReportIncomplete();
+ operands->offset = reader_->ReadEightBytes(cursor_);
+ cursor_ += 8;
+ break;
+
+ case 'e': {
+ size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
+ if (len > bytes_left || expression_length > bytes_left - len)
+ return ReportIncomplete();
+ cursor_ += len;
+ operands->expression = string(cursor_, expression_length);
+ cursor_ += expression_length;
+ break;
+ }
+
+ default:
+ MOZ_ASSERT(0);
+ }
+ }
+
+ return true;
+}
+
+bool CallFrameInfo::State::DoInstruction() {
+ CIE* cie = entry_->cie;
+ Operands ops;
+
+ // Our entry's kind should have been set by now.
+ MOZ_ASSERT(entry_->kind != kUnknown);
+
+ // We shouldn't have been invoked unless there were more
+ // instructions to parse.
+ MOZ_ASSERT(cursor_ < entry_->end);
+
+ unsigned opcode = *cursor_++;
+ if ((opcode & 0xc0) != 0) {
+ switch (opcode & 0xc0) {
+ // Advance the address.
+ case DW_CFA_advance_loc: {
+ size_t code_offset = opcode & 0x3f;
+ address_ += code_offset * cie->code_alignment_factor;
+ break;
+ }
+
+ // Find a register at an offset from the CFA.
+ case DW_CFA_offset:
+ if (!ParseOperands("o", &ops) ||
+ !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Restore the rule established for a register by the CIE.
+ case DW_CFA_restore:
+ if (!DoRestore(opcode & 0x3f)) return false;
+ break;
+
+ // The 'if' above should have excluded this possibility.
+ default:
+ MOZ_ASSERT(0);
+ }
+
+ // Return here, so the big switch below won't be indented.
+ return true;
+ }
+
+ switch (opcode) {
+ // Set the address.
+ case DW_CFA_set_loc:
+ if (!ParseOperands("a", &ops)) return false;
+ address_ = ops.offset;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc1:
+ if (!ParseOperands("1", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc2:
+ if (!ParseOperands("2", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_advance_loc4:
+ if (!ParseOperands("4", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Advance the address.
+ case DW_CFA_MIPS_advance_loc8:
+ if (!ParseOperands("8", &ops)) return false;
+ address_ += ops.offset * cie->code_alignment_factor;
+ break;
+
+ // Compute the CFA by adding an offset to a register.
+ case DW_CFA_def_cfa:
+ if (!ParseOperands("ro", &ops) ||
+ !DoDefCFA(ops.register_number, ops.offset))
+ return false;
+ break;
+
+ // Compute the CFA by adding an offset to a register.
+ case DW_CFA_def_cfa_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoDefCFA(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Change the base register used to compute the CFA.
+ case DW_CFA_def_cfa_register: {
+ Rule* cfa_rule = rules_.CFARule();
+ if (!cfa_rule) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ if (!ParseOperands("r", &ops)) return false;
+ cfa_rule->SetBaseRegister(ops.register_number);
+ if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister))
+ return false;
+ break;
+ }
+
+ // Change the offset used to compute the CFA.
+ case DW_CFA_def_cfa_offset:
+ if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset))
+ return false;
+ break;
+
+ // Change the offset used to compute the CFA.
+ case DW_CFA_def_cfa_offset_sf:
+ if (!ParseOperands("s", &ops) ||
+ !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // Specify an expression whose value is the CFA.
+ case DW_CFA_def_cfa_expression: {
+ if (!ParseOperands("e", &ops)) return false;
+ Rule* rule = new ValExpressionRule(ops.expression);
+ rules_.SetCFARule(rule);
+ if (!rule->Handle(handler_, address_, Handler::kCFARegister))
+ return false;
+ break;
+ }
+
+ // The register's value cannot be recovered.
+ case DW_CFA_undefined: {
+ if (!ParseOperands("r", &ops) ||
+ !DoRule(ops.register_number, new UndefinedRule()))
+ return false;
+ break;
+ }
+
+ // The register's value is unchanged from its value in the caller.
+ case DW_CFA_same_value: {
+ if (!ParseOperands("r", &ops) ||
+ !DoRule(ops.register_number, new SameValueRule()))
+ return false;
+ break;
+ }
+
+ // Find a register at an offset from the CFA.
+ case DW_CFA_offset_extended:
+ if (!ParseOperands("ro", &ops) ||
+ !DoOffset(ops.register_number,
+ ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register is saved at an offset from the CFA.
+ case DW_CFA_offset_extended_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoOffset(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register is saved at an offset from the CFA.
+ case DW_CFA_GNU_negative_offset_extended:
+ if (!ParseOperands("ro", &ops) ||
+ !DoOffset(ops.register_number,
+ -ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register's value is the sum of the CFA plus an offset.
+ case DW_CFA_val_offset:
+ if (!ParseOperands("ro", &ops) ||
+ !DoValOffset(ops.register_number,
+ ops.offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register's value is the sum of the CFA plus an offset.
+ case DW_CFA_val_offset_sf:
+ if (!ParseOperands("rs", &ops) ||
+ !DoValOffset(ops.register_number,
+ ops.signed_offset * cie->data_alignment_factor))
+ return false;
+ break;
+
+ // The register has been saved in another register.
+ case DW_CFA_register: {
+ if (!ParseOperands("ro", &ops) ||
+ !DoRule(ops.register_number, new RegisterRule(ops.offset)))
+ return false;
+ break;
+ }
+
+ // An expression yields the address at which the register is saved.
+ case DW_CFA_expression: {
+ if (!ParseOperands("re", &ops) ||
+ !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
+ return false;
+ break;
+ }
+
+ // An expression yields the caller's value for the register.
+ case DW_CFA_val_expression: {
+ if (!ParseOperands("re", &ops) ||
+ !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
+ return false;
+ break;
+ }
+
+ // Restore the rule established for a register by the CIE.
+ case DW_CFA_restore_extended:
+ if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number))
+ return false;
+ break;
+
+ // Save the current set of rules on a stack.
+ case DW_CFA_remember_state:
+ if (!saved_rules_) {
+ saved_rules_ = new std::stack<RuleMap>();
+ }
+ saved_rules_->push(rules_);
+ break;
+
+ // Pop the current set of rules off the stack.
+ case DW_CFA_restore_state: {
+ if (!saved_rules_ || saved_rules_->empty()) {
+ reporter_->EmptyStateStack(entry_->offset, entry_->kind,
+ CursorOffset());
+ return false;
+ }
+ const RuleMap& new_rules = saved_rules_->top();
+ if (rules_.CFARule() && !new_rules.CFARule()) {
+ reporter_->ClearingCFARule(entry_->offset, entry_->kind,
+ CursorOffset());
+ return false;
+ }
+ rules_.HandleTransitionTo(handler_, address_, new_rules);
+ rules_ = new_rules;
+ saved_rules_->pop();
+ break;
+ }
+
+ // No operation. (Padding instruction.)
+ case DW_CFA_nop:
+ break;
+
+ // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
+ // are saved in registers 24 through 31 (%i0-%i7), and registers
+ // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
+ // (0-15 * the register size). The register numbers must be
+ // hard-coded. A GNU extension, and not a pretty one.
+ case DW_CFA_GNU_window_save: {
+ // Save %o0-%o7 in %i0-%i7.
+ for (int i = 8; i < 16; i++)
+ if (!DoRule(i, new RegisterRule(i + 16))) return false;
+ // Save %l0-%l7 and %i0-%i7 at the CFA.
+ for (int i = 16; i < 32; i++)
+ // Assume that the byte reader's address size is the same as
+ // the architecture's register size. !@#%*^ hilarious.
+ if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
+ (i - 16) * reader_->AddressSize())))
+ return false;
+ break;
+ }
+
+ // I'm not sure what this is. GDB doesn't use it for unwinding.
+ case DW_CFA_GNU_args_size:
+ if (!ParseOperands("o", &ops)) return false;
+ break;
+
+ // An opcode we don't recognize.
+ default: {
+ reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
+ Rule* rule = new ValOffsetRule(base_register, offset);
+ rules_.SetCFARule(rule);
+ return rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
+ Rule* cfa_rule = rules_.CFARule();
+ if (!cfa_rule) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ cfa_rule->SetOffset(offset);
+ return cfa_rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
+ rules_.SetRegisterRule(reg, rule);
+ return rule->Handle(handler_, address_, reg);
+}
+
+bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
+ if (!rules_.CFARule()) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ return DoRule(reg, new OffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
+ if (!rules_.CFARule()) {
+ reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+ return false;
+ }
+ return DoRule(reg, new ValOffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoRestore(unsigned reg) {
+ // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
+ if (entry_->kind == kCIE) {
+ reporter_->RestoreInCIE(entry_->offset, CursorOffset());
+ return false;
+ }
+ Rule* rule = cie_rules_.RegisterRule(reg);
+ if (!rule) {
+ // This isn't really the right thing to do, but since CFI generally
+ // only mentions callee-saves registers, and GCC's convention for
+ // callee-saves registers is that they are unchanged, it's a good
+ // approximation.
+ rule = new SameValueRule();
+ }
+ return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) {
+ const char* buffer_end = buffer_ + buffer_length_;
+
+ // Initialize enough of ENTRY for use in error reporting.
+ entry->offset = cursor - buffer_;
+ entry->start = cursor;
+ entry->kind = kUnknown;
+ entry->end = NULL;
+
+ // Read the initial length. This sets reader_'s offset size.
+ size_t length_size;
+ uint64 length = reader_->ReadInitialLength(cursor, &length_size);
+ if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+ cursor += length_size;
+
+ // In a .eh_frame section, a length of zero marks the end of the series
+ // of entries.
+ if (length == 0 && eh_frame_) {
+ entry->kind = kTerminator;
+ entry->end = cursor;
+ return true;
+ }
+
+ // Validate the length.
+ if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+
+ // The length is the number of bytes after the initial length field;
+ // we have that position handy at this point, so compute the end
+ // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
+ // and the length didn't fit in a size_t, we would have rejected it
+ // above.)
+ entry->end = cursor + length;
+
+ // Parse the next field: either the offset of a CIE or a CIE id.
+ size_t offset_size = reader_->OffsetSize();
+ if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
+ entry->id = reader_->ReadOffset(cursor);
+
+ // Don't advance cursor past id field yet; in .eh_frame data we need
+ // the id's position to compute the section offset of an FDE's CIE.
+
+ // Now we can decide what kind of entry this is.
+ if (eh_frame_) {
+ // In .eh_frame data, an ID of zero marks the entry as a CIE, and
+ // anything else is an offset from the id field of the FDE to the start
+ // of the CIE.
+ if (entry->id == 0) {
+ entry->kind = kCIE;
+ } else {
+ entry->kind = kFDE;
+ // Turn the offset from the id into an offset from the buffer's start.
+ entry->id = (cursor - buffer_) - entry->id;
+ }
+ } else {
+ // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
+ // offset size for the entry) marks the entry as a CIE, and anything
+ // else is the offset of the CIE from the beginning of the section.
+ if (offset_size == 4)
+ entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
+ else {
+ MOZ_ASSERT(offset_size == 8);
+ entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
+ }
+ }
+
+ // Now advance cursor past the id.
+ cursor += offset_size;
+
+ // The fields specific to this kind of entry start here.
+ entry->fields = cursor;
+
+ entry->cie = NULL;
+
+ return true;
+}
+
+bool CallFrameInfo::ReadCIEFields(CIE* cie) {
+ const char* cursor = cie->fields;
+ size_t len;
+
+ MOZ_ASSERT(cie->kind == kCIE);
+
+ // Prepare for early exit.
+ cie->version = 0;
+ cie->augmentation.clear();
+ cie->code_alignment_factor = 0;
+ cie->data_alignment_factor = 0;
+ cie->return_address_register = 0;
+ cie->has_z_augmentation = false;
+ cie->pointer_encoding = DW_EH_PE_absptr;
+ cie->instructions = 0;
+
+ // Parse the version number.
+ if (cie->end - cursor < 1) return ReportIncomplete(cie);
+ cie->version = reader_->ReadOneByte(cursor);
+ cursor++;
+
+ // If we don't recognize the version, we can't parse any more fields of the
+ // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
+ // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
+ // the difference between those versions seems to be the same as for
+ // .debug_frame.
+ if (cie->version < 1 || cie->version > 4) {
+ reporter_->UnrecognizedVersion(cie->offset, cie->version);
+ return false;
+ }
+
+ const char* augmentation_start = cursor;
+ const void* augmentation_end =
+ memchr(augmentation_start, '\0', cie->end - augmentation_start);
+ if (!augmentation_end) return ReportIncomplete(cie);
+ cursor = static_cast<const char*>(augmentation_end);
+ cie->augmentation = string(augmentation_start, cursor - augmentation_start);
+ // Skip the terminating '\0'.
+ cursor++;
+
+ // Is this CFI augmented?
+ if (!cie->augmentation.empty()) {
+ // Is it an augmentation we recognize?
+ if (cie->augmentation[0] == DW_Z_augmentation_start) {
+ // Linux C++ ABI 'z' augmentation, used for exception handling data.
+ cie->has_z_augmentation = true;
+ } else {
+ // Not an augmentation we recognize. Augmentations can have arbitrary
+ // effects on the form of rest of the content, so we have to give up.
+ reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+ return false;
+ }
+ }
+
+ if (cie->version >= 4) {
+ // Check that the address_size and segment_size fields are plausible.
+ if (cie->end - cursor < 2) {
+ return ReportIncomplete(cie);
+ }
+ uint8_t address_size = reader_->ReadOneByte(cursor);
+ cursor++;
+ if (address_size != sizeof(void*)) {
+ // This is not per-se invalid CFI. But we can reasonably expect to
+ // be running on a target of the same word size as the CFI is for,
+ // so we reject this case.
+ reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size");
+ return false;
+ }
+ uint8_t segment_size = reader_->ReadOneByte(cursor);
+ cursor++;
+ if (segment_size != 0) {
+ // This is also not per-se invalid CFI, but we don't currently handle
+ // the case of non-zero |segment_size|.
+ reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size");
+ return false;
+ }
+ // We only continue parsing if |segment_size| is zero. If this routine
+ // is ever changed to allow non-zero |segment_size|, then
+ // ReadFDEFields() below will have to be changed to match, per comments
+ // there.
+ }
+
+ // Parse the code alignment factor.
+ cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+
+ // Parse the data alignment factor.
+ cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+
+ // Parse the return address register. This is a ubyte in version 1, and
+ // a ULEB128 in version 3.
+ if (cie->version == 1) {
+ if (cursor >= cie->end) return ReportIncomplete(cie);
+ cie->return_address_register = uint8(*cursor++);
+ } else {
+ cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+ cursor += len;
+ }
+
+ // If we have a 'z' augmentation string, find the augmentation data and
+ // use the augmentation string to parse it.
+ if (cie->has_z_augmentation) {
+ uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
+ if (size_t(cie->end - cursor) < len + data_size)
+ return ReportIncomplete(cie);
+ cursor += len;
+ const char* data = cursor;
+ cursor += data_size;
+ const char* data_end = cursor;
+
+ cie->has_z_lsda = false;
+ cie->has_z_personality = false;
+ cie->has_z_signal_frame = false;
+
+ // Walk the augmentation string, and extract values from the
+ // augmentation data as the string directs.
+ for (size_t i = 1; i < cie->augmentation.size(); i++) {
+ switch (cie->augmentation[i]) {
+ case DW_Z_has_LSDA:
+ // The CIE's augmentation data holds the language-specific data
+ // area pointer's encoding, and the FDE's augmentation data holds
+ // the pointer itself.
+ cie->has_z_lsda = true;
+ // Fetch the LSDA encoding from the augmentation data.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->lsda_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->lsda_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
+ return false;
+ }
+ // Don't check if the encoding is usable here --- we haven't
+ // read the FDE's fields yet, so we're not prepared for
+ // DW_EH_PE_funcrel, although that's a fine encoding for the
+ // LSDA to use, since it appears in the FDE.
+ break;
+
+ case DW_Z_has_personality_routine:
+ // The CIE's augmentation data holds the personality routine
+ // pointer's encoding, followed by the pointer itself.
+ cie->has_z_personality = true;
+ // Fetch the personality routine pointer's encoding from the
+ // augmentation data.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->personality_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->personality_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset,
+ cie->personality_encoding);
+ return false;
+ }
+ if (!reader_->UsableEncoding(cie->personality_encoding)) {
+ reporter_->UnusablePointerEncoding(cie->offset,
+ cie->personality_encoding);
+ return false;
+ }
+ // Fetch the personality routine's pointer itself from the data.
+ cie->personality_address = reader_->ReadEncodedPointer(
+ data, cie->personality_encoding, &len);
+ if (len > size_t(data_end - data)) return ReportIncomplete(cie);
+ data += len;
+ break;
+
+ case DW_Z_has_FDE_address_encoding:
+ // The CIE's augmentation data holds the pointer encoding to use
+ // for addresses in the FDE.
+ if (data >= data_end) return ReportIncomplete(cie);
+ cie->pointer_encoding = DwarfPointerEncoding(*data++);
+ if (!reader_->ValidEncoding(cie->pointer_encoding)) {
+ reporter_->InvalidPointerEncoding(cie->offset,
+ cie->pointer_encoding);
+ return false;
+ }
+ if (!reader_->UsableEncoding(cie->pointer_encoding)) {
+ reporter_->UnusablePointerEncoding(cie->offset,
+ cie->pointer_encoding);
+ return false;
+ }
+ break;
+
+ case DW_Z_is_signal_trampoline:
+ // Frames using this CIE are signal delivery frames.
+ cie->has_z_signal_frame = true;
+ break;
+
+ default:
+ // An augmentation we don't recognize.
+ reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+ return false;
+ }
+ }
+ }
+
+ // The CIE's instructions start here.
+ cie->instructions = cursor;
+
+ return true;
+}
+
+bool CallFrameInfo::ReadFDEFields(FDE* fde) {
+ const char* cursor = fde->fields;
+ size_t size;
+
+ // At this point, for Dwarf 4 and above, we are assuming that the
+ // associated CIE has its |segment_size| field equal to zero. This is
+ // checked for in ReadCIEFields() above. If ReadCIEFields() is ever
+ // changed to allow non-zero |segment_size| CIEs then we will have to read
+ // the segment_selector value at this point.
+
+ fde->address =
+ reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size);
+ if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+ cursor += size;
+ reader_->SetFunctionBase(fde->address);
+
+ // For the length, we strip off the upper nybble of the encoding used for
+ // the starting address.
+ DwarfPointerEncoding length_encoding =
+ DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
+ fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
+ if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+ cursor += size;
+
+ // If the CIE has a 'z' augmentation string, then augmentation data
+ // appears here.
+ if (fde->cie->has_z_augmentation) {
+ uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
+ if (size_t(fde->end - cursor) < size + data_size)
+ return ReportIncomplete(fde);
+ cursor += size;
+
+ // In the abstract, we should walk the augmentation string, and extract
+ // items from the FDE's augmentation data as we encounter augmentation
+ // string characters that specify their presence: the ordering of items
+ // in the augmentation string determines the arrangement of values in
+ // the augmentation data.
+ //
+ // In practice, there's only ever one value in FDE augmentation data
+ // that we support --- the LSDA pointer --- and we have to bail if we
+ // see any unrecognized augmentation string characters. So if there is
+ // anything here at all, we know what it is, and where it starts.
+ if (fde->cie->has_z_lsda) {
+ // Check whether the LSDA's pointer encoding is usable now: only once
+ // we've parsed the FDE's starting address do we call reader_->
+ // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
+ // usable.
+ if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
+ reporter_->UnusablePointerEncoding(fde->cie->offset,
+ fde->cie->lsda_encoding);
+ return false;
+ }
+
+ fde->lsda_address =
+ reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
+ if (size > data_size) return ReportIncomplete(fde);
+ // Ideally, we would also complain here if there were unconsumed
+ // augmentation data.
+ }
+
+ cursor += data_size;
+ }
+
+ // The FDE's instructions start after those.
+ fde->instructions = cursor;
+
+ return true;
+}
+
+bool CallFrameInfo::Start() {
+ const char* buffer_end = buffer_ + buffer_length_;
+ const char* cursor;
+ bool all_ok = true;
+ const char* entry_end;
+ bool ok;
+
+ // Traverse all the entries in buffer_, skipping CIEs and offering
+ // FDEs to the handler.
+ for (cursor = buffer_; cursor < buffer_end;
+ cursor = entry_end, all_ok = all_ok && ok) {
+ FDE fde;
+
+ // Make it easy to skip this entry with 'continue': assume that
+ // things are not okay until we've checked all the data, and
+ // prepare the address of the next entry.
+ ok = false;
+
+ // Read the entry's prologue.
+ if (!ReadEntryPrologue(cursor, &fde)) {
+ if (!fde.end) {
+ // If we couldn't even figure out this entry's extent, then we
+ // must stop processing entries altogether.
+ all_ok = false;
+ break;
+ }
+ entry_end = fde.end;
+ continue;
+ }
+
+ // The next iteration picks up after this entry.
+ entry_end = fde.end;
+
+ // Did we see an .eh_frame terminating mark?
+ if (fde.kind == kTerminator) {
+ // If there appears to be more data left in the section after the
+ // terminating mark, warn the user. But this is just a warning;
+ // we leave all_ok true.
+ if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
+ break;
+ }
+
+ // In this loop, we skip CIEs. We only parse them fully when we
+ // parse an FDE that refers to them. This limits our memory
+ // consumption (beyond the buffer itself) to that needed to
+ // process the largest single entry.
+ if (fde.kind != kFDE) {
+ ok = true;
+ continue;
+ }
+
+ // Validate the CIE pointer.
+ if (fde.id > buffer_length_) {
+ reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
+ continue;
+ }
+
+ CIE cie;
+
+ // Parse this FDE's CIE header.
+ if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue;
+ // This had better be an actual CIE.
+ if (cie.kind != kCIE) {
+ reporter_->BadCIEId(fde.offset, fde.id);
+ continue;
+ }
+ if (!ReadCIEFields(&cie)) continue;
+
+ // We now have the values that govern both the CIE and the FDE.
+ cie.cie = &cie;
+ fde.cie = &cie;
+
+ // Parse the FDE's header.
+ if (!ReadFDEFields(&fde)) continue;
+
+ // Call Entry to ask the consumer if they're interested.
+ if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version,
+ cie.augmentation, cie.return_address_register)) {
+ // The handler isn't interested in this entry. That's not an error.
+ ok = true;
+ continue;
+ }
+
+ if (cie.has_z_augmentation) {
+ // Report the personality routine address, if we have one.
+ if (cie.has_z_personality) {
+ if (!handler_->PersonalityRoutine(
+ cie.personality_address,
+ IsIndirectEncoding(cie.personality_encoding)))
+ continue;
+ }
+
+ // Report the language-specific data area address, if we have one.
+ if (cie.has_z_lsda) {
+ if (!handler_->LanguageSpecificDataArea(
+ fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding)))
+ continue;
+ }
+
+ // If this is a signal-handling frame, report that.
+ if (cie.has_z_signal_frame) {
+ if (!handler_->SignalHandler()) continue;
+ }
+ }
+
+ // Interpret the CIE's instructions, and then the FDE's instructions.
+ State state(reader_, handler_, reporter_, fde.address);
+ ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
+
+ // Tell the ByteReader that the function start address from the
+ // FDE header is no longer valid.
+ reader_->ClearFunctionBase();
+
+ // Report the end of the entry.
+ handler_->End();
+ }
+
+ return all_ok;
+}
+
+const char* CallFrameInfo::KindName(EntryKind kind) {
+ if (kind == CallFrameInfo::kUnknown)
+ return "entry";
+ else if (kind == CallFrameInfo::kCIE)
+ return "common information entry";
+ else if (kind == CallFrameInfo::kFDE)
+ return "frame description entry";
+ else {
+ MOZ_ASSERT(kind == CallFrameInfo::kTerminator);
+ return ".eh_frame sequence terminator";
+ }
+}
+
+bool CallFrameInfo::ReportIncomplete(Entry* entry) {
+ reporter_->Incomplete(entry->offset, entry->kind);
+ return false;
+}
+
+void CallFrameInfo::Reporter::Incomplete(uint64 offset,
+ CallFrameInfo::EntryKind kind) {
+ char buf[300];
+ SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str());
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
+ " before end of section contents\n",
+ filename_.c_str(), offset, section_.c_str());
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
+ uint64 cie_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI frame description entry at offset 0x%llx in '%s':"
+ " CIE pointer is out of range: 0x%llx\n",
+ filename_.c_str(), offset, section_.c_str(), cie_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI frame description entry at offset 0x%llx in '%s':"
+ " CIE pointer does not point to a CIE: 0x%llx\n",
+ filename_.c_str(), offset, section_.c_str(), cie_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI frame description entry at offset 0x%llx in '%s':"
+ " CIE specifies unrecognized version: %d\n",
+ filename_.c_str(), offset, section_.c_str(), version);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
+ const string& aug) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI frame description entry at offset 0x%llx in '%s':"
+ " CIE specifies unrecognized augmentation: '%s'\n",
+ filename_.c_str(), offset, section_.c_str(), aug.c_str());
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset,
+ const char* what) {
+ char* what_safe = strndup(what, 100);
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI frame description entry at offset 0x%llx in '%s':"
+ " CIE specifies invalid Dwarf4 artefact: %s\n",
+ filename_.c_str(), offset, section_.c_str(), what_safe);
+ log_(buf);
+ free(what_safe);
+}
+
+void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
+ uint8 encoding) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI common information entry at offset 0x%llx in '%s':"
+ " 'z' augmentation specifies invalid pointer encoding: "
+ "0x%02x\n",
+ filename_.c_str(), offset, section_.c_str(), encoding);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
+ uint8 encoding) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI common information entry at offset 0x%llx in '%s':"
+ " 'z' augmentation specifies a pointer encoding for which"
+ " we have no base address: 0x%02x\n",
+ filename_.c_str(), offset, section_.c_str(), encoding);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI common information entry at offset 0x%llx in '%s':"
+ " the DW_CFA_restore instruction at offset 0x%llx"
+ " cannot be used in a common information entry\n",
+ filename_.c_str(), offset, section_.c_str(), insn_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
+ CallFrameInfo::EntryKind kind,
+ uint64 insn_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI %s at offset 0x%llx in section '%s':"
+ " the instruction at offset 0x%llx is unrecognized\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
+ CallFrameInfo::EntryKind kind,
+ uint64 insn_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI %s at offset 0x%llx in section '%s':"
+ " the instruction at offset 0x%llx assumes that a CFA rule "
+ "has been set, but none has been set\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
+ CallFrameInfo::EntryKind kind,
+ uint64 insn_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI %s at offset 0x%llx in section '%s':"
+ " the DW_CFA_restore_state instruction at offset 0x%llx"
+ " should pop a saved state from the stack, but the stack "
+ "is empty\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+ log_(buf);
+}
+
+void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
+ CallFrameInfo::EntryKind kind,
+ uint64 insn_offset) {
+ char buf[300];
+ SprintfLiteral(buf,
+ "%s: CFI %s at offset 0x%llx in section '%s':"
+ " the DW_CFA_restore_state instruction at offset 0x%llx"
+ " would clear the CFA rule in effect\n",
+ filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+ section_.c_str(), insn_offset);
+ log_(buf);
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::I386() {
+ /*
+ 8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi",
+ 3 "$eip", "$eflags", "$unused1",
+ 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+ 2 "$unused2", "$unused3",
+ 8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+ 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+ 3 "$fcw", "$fsw", "$mxcsr",
+ 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5",
+ 2 "$tr", "$ldtr"
+ */
+ return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::X86_64() {
+ /*
+ 8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp",
+ 8 "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15",
+ 1 "$rip",
+ 8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+ 8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15",
+ 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+ 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+ 1 "$rflags",
+ 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2",
+ 4 "$fs.base", "$gs.base", "$unused3", "$unused4",
+ 2 "$tr", "$ldtr",
+ 3 "$mxcsr", "$fcw", "$fsw"
+ */
+ return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3;
+}
+
+// Per ARM IHI 0040A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM() {
+ /*
+ 8 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ 8 "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc",
+ 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
+ 8 "fps", "cpsr", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+ 8 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+ 8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+ 8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+ 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
+ */
+ return 13 * 8;
+}
+
+// Per ARM IHI 0057A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM64() {
+ /*
+ 8 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ 8 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ 8 "x16" "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ 8 "x24", "x25", "x26", "x27", "x28", "x29", "x30","sp",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "", "", "", "", "", "", "", "",
+ 8 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ 8 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ 8 "v16", "v17", "v18", "v19", "v20", "v21", "v22, "v23",
+ 8 "v24", "x25", "x26, "x27", "v28", "v29", "v30", "v31",
+ */
+ return 12 * 8;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::MIPS() {
+ /*
+ 8 "$zero", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3",
+ 8 "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
+ 8 "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7",
+ 8 "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra",
+ 9 "$lo", "$hi", "$pc", "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
+ 8 "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", "$f12", "$f13",
+ 7 "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20",
+ 7 "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",
+ 6 "$f28", "$f29", "$f30", "$f31", "$fcsr", "$fir"
+ */
+ return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6;
+}
+
+// See prototype for comments.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, string expr,
+ bool debug, bool pushCfaAtStart, bool derefAtEnd) {
+ const char* cursor = expr.c_str();
+ const char* end1 = cursor + expr.length();
+
+ char buf[100];
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW << DwarfExpr, len is %d\n",
+ (int)(end1 - cursor));
+ summ->Log(buf);
+ }
+
+ // Add a marker for the start of this expression. In it, indicate
+ // whether or not the CFA should be pushed onto the stack prior to
+ // evaluation.
+ int32_t start_ix =
+ summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0));
+ MOZ_ASSERT(start_ix >= 0);
+
+ while (cursor < end1) {
+ uint8 opc = reader->ReadOneByte(cursor);
+ cursor++;
+
+ const char* nm = nullptr;
+ PfxExprOp pxop = PX_End;
+
+ switch (opc) {
+ case DW_OP_lit0 ... DW_OP_lit31: {
+ int32_t simm32 = (int32_t)(opc - DW_OP_lit0);
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW DW_OP_lit%d\n", (int)simm32);
+ summ->Log(buf);
+ }
+ (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32));
+ break;
+ }
+
+ case DW_OP_breg0 ... DW_OP_breg31: {
+ size_t len;
+ int64_t n = reader->ReadSignedLEB128(cursor, &len);
+ cursor += len;
+ DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0);
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW DW_OP_breg%d %lld\n", (int)reg,
+ (long long int)n);
+ summ->Log(buf);
+ }
+ // PfxInstr only allows a 32 bit signed offset. So we
+ // must fail if the immediate is out of range.
+ if (n < INT32_MIN || INT32_MAX < n) goto fail;
+ (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg));
+ (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n));
+ (void)summ->AddPfxInstr(PfxInstr(PX_Add));
+ break;
+ }
+
+ case DW_OP_const4s: {
+ uint64_t u64 = reader->ReadFourBytes(cursor);
+ cursor += 4;
+ // u64 is guaranteed by |ReadFourBytes| to be in the
+ // range 0 .. FFFFFFFF inclusive. But to be safe:
+ uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF);
+ int32_t s32 = (int32_t)u32;
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW DW_OP_const4s %d\n", (int)s32);
+ summ->Log(buf);
+ }
+ (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32));
+ break;
+ }
+
+ case DW_OP_deref:
+ nm = "deref";
+ pxop = PX_Deref;
+ goto no_operands;
+ case DW_OP_and:
+ nm = "and";
+ pxop = PX_And;
+ goto no_operands;
+ case DW_OP_plus:
+ nm = "plus";
+ pxop = PX_Add;
+ goto no_operands;
+ case DW_OP_minus:
+ nm = "minus";
+ pxop = PX_Sub;
+ goto no_operands;
+ case DW_OP_shl:
+ nm = "shl";
+ pxop = PX_Shl;
+ goto no_operands;
+ case DW_OP_ge:
+ nm = "ge";
+ pxop = PX_CmpGES;
+ goto no_operands;
+ no_operands:
+ MOZ_ASSERT(nm && pxop != PX_End);
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW DW_OP_%s\n", nm);
+ summ->Log(buf);
+ }
+ (void)summ->AddPfxInstr(PfxInstr(pxop));
+ break;
+
+ default:
+ if (debug) {
+ SprintfLiteral(buf, "LUL.DW unknown opc %d\n", (int)opc);
+ summ->Log(buf);
+ }
+ goto fail;
+
+ } // switch (opc)
+
+ } // while (cursor < end1)
+
+ MOZ_ASSERT(cursor >= end1);
+
+ if (cursor > end1) {
+ // We overran the Dwarf expression. Give up.
+ goto fail;
+ }
+
+ // For DW_CFA_expression, what the expression denotes is the address
+ // of where the previous value is located. The caller of this routine
+ // may therefore request one last dereference before the end marker is
+ // inserted.
+ if (derefAtEnd) {
+ (void)summ->AddPfxInstr(PfxInstr(PX_Deref));
+ }
+
+ // Insert an end marker, and declare success.
+ (void)summ->AddPfxInstr(PfxInstr(PX_End));
+ if (debug) {
+ SprintfLiteral(buf,
+ "LUL.DW conversion of dwarf expression succeeded, "
+ "ix = %d\n",
+ (int)start_ix);
+ summ->Log(buf);
+ summ->Log("LUL.DW >>\n");
+ }
+ return start_ix;
+
+fail:
+ if (debug) {
+ summ->Log("LUL.DW conversion of dwarf expression failed\n");
+ summ->Log("LUL.DW >>\n");
+ }
+ return -1;
+}
+
+bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length,
+ uint8 version, const string& augmentation,
+ unsigned return_address) {
+ if (DEBUG_DWARF) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n",
+ address, length);
+ summ_->Log(buf);
+ }
+
+ summ_->Entry(address, length);
+
+ // If dwarf2reader::CallFrameInfo can handle this version and
+ // augmentation, then we should be okay with that, so there's no
+ // need to check them here.
+
+ // Get ready to collect entries.
+ return_address_ = return_address;
+
+ // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI
+ // may not establish any rule for .ra if the return address column
+ // is an ordinary register, and that register holds the return
+ // address on entry to the function. So establish an initial .ra
+ // rule citing the return address register.
+ if (return_address_ < num_dw_regs_) {
+ summ_->Rule(address, return_address_, NODEREF, return_address, 0);
+ }
+
+ return true;
+}
+
+const UniqueString* DwarfCFIToModule::RegisterName(int i) {
+ if (i < 0) {
+ MOZ_ASSERT(i == kCFARegister);
+ return usu_->ToUniqueString(".cfa");
+ }
+ unsigned reg = i;
+ if (reg == return_address_) return usu_->ToUniqueString(".ra");
+
+ char buf[30];
+ SprintfLiteral(buf, "dwarf_reg_%u", reg);
+ return usu_->ToUniqueString(buf);
+}
+
+bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) {
+ reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg));
+ // Treat this as a non-fatal error.
+ return true;
+}
+
+bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) {
+ if (DEBUG_DWARF) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = Same\n", address, reg);
+ summ_->Log(buf);
+ }
+ // reg + 0
+ summ_->Rule(address, reg, NODEREF, reg, 0);
+ return true;
+}
+
+bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register,
+ long offset) {
+ if (DEBUG_DWARF) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = *(r%d + %ld)\n", address,
+ reg, base_register, offset);
+ summ_->Log(buf);
+ }
+ // *(base_register + offset)
+ summ_->Rule(address, reg, DEREF, base_register, offset);
+ return true;
+}
+
+bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register,
+ long offset) {
+ if (DEBUG_DWARF) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d + %ld\n", address, reg,
+ base_register, offset);
+ summ_->Log(buf);
+ }
+ // base_register + offset
+ summ_->Rule(address, reg, NODEREF, base_register, offset);
+ return true;
+}
+
+bool DwarfCFIToModule::RegisterRule(uint64 address, int reg,
+ int base_register) {
+ if (DEBUG_DWARF) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d\n", address, reg,
+ base_register);
+ summ_->Log(buf);
+ }
+ // base_register + 0
+ summ_->Rule(address, reg, NODEREF, base_register, 0);
+ return true;
+}
+
+bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg,
+ const string& expression) {
+ bool debug = !!DEBUG_DWARF;
+ int32_t start_ix =
+ parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+ true /*derefAtEnd*/);
+ if (start_ix >= 0) {
+ summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+ } else {
+ // Parsing of the Dwarf expression failed. Treat this as a
+ // non-fatal error, hence return |true| even on this path.
+ reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+ }
+ return true;
+}
+
+bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg,
+ const string& expression) {
+ bool debug = !!DEBUG_DWARF;
+ int32_t start_ix =
+ parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+ false /*!derefAtEnd*/);
+ if (start_ix >= 0) {
+ summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+ } else {
+ // Parsing of the Dwarf expression failed. Treat this as a
+ // non-fatal error, hence return |true| even on this path.
+ reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+ }
+ return true;
+}
+
+bool DwarfCFIToModule::End() {
+ // module_->AddStackFrameEntry(entry_);
+ if (DEBUG_DWARF) {
+ summ_->Log("LUL.DW DwarfCFIToModule::End()\n");
+ }
+ summ_->End();
+ return true;
+}
+
+void DwarfCFIToModule::Reporter::UndefinedNotSupported(
+ size_t offset, const UniqueString* reg) {
+ char buf[300];
+ SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n");
+ log_(buf);
+ // BPLOG(INFO) << file_ << ", section '" << section_
+ // << "': the call frame entry at offset 0x"
+ // << std::setbase(16) << offset << std::setbase(10)
+ // << " sets the rule for register '" << FromUniqueString(reg)
+ // << "' to 'undefined', but the Breakpad symbol file format cannot "
+ // << " express this";
+}
+
+// FIXME: move this somewhere sensible
+static bool is_power_of_2(uint64_t n) {
+ int i, nSetBits = 0;
+ for (i = 0; i < 8 * (int)sizeof(n); i++) {
+ if ((n & ((uint64_t)1) << i) != 0) nSetBits++;
+ }
+ return nSetBits <= 1;
+}
+
+void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised(
+ size_t offset, const UniqueString* reg) {
+ static uint64_t n_complaints = 0; // This isn't threadsafe
+ n_complaints++;
+ if (!is_power_of_2(n_complaints)) return;
+ char buf[300];
+ SprintfLiteral(buf,
+ "DwarfCFIToModule::Reporter::"
+ "ExpressionCouldNotBeSummarised(shown %llu times)\n",
+ (unsigned long long int)n_complaints);
+ log_(buf);
+}
+
+} // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulDwarfExt.h b/mozglue/baseprofiler/lul/LulDwarfExt.h
new file mode 100644
index 0000000000..dcd2500e5a
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfExt.h
@@ -0,0 +1,1289 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright 2006, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/dwarf/types.h
+// src/common/dwarf/dwarf2enums.h
+// src/common/dwarf/bytereader.h
+// src/common/dwarf_cfi_to_module.h
+// src/common/dwarf/dwarf2reader.h
+
+#ifndef LulDwarfExt_h
+#define LulDwarfExt_h
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+
+#include <stdint.h>
+#include <string>
+
+typedef signed char int8;
+typedef short int16;
+typedef int int32;
+typedef long long int64;
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef unsigned long long uint64;
+
+#ifdef __PTRDIFF_TYPE__
+typedef __PTRDIFF_TYPE__ intptr;
+typedef unsigned __PTRDIFF_TYPE__ uintptr;
+#else
+# error "Can't find pointer-sized integral types."
+#endif
+
+namespace lul {
+
+class UniqueString;
+
+// Exception handling frame description pointer formats, as described
+// by the Linux Standard Base Core Specification 4.0, section 11.5,
+// DWARF Extensions.
+enum DwarfPointerEncoding {
+ DW_EH_PE_absptr = 0x00,
+ DW_EH_PE_omit = 0xff,
+ DW_EH_PE_uleb128 = 0x01,
+ DW_EH_PE_udata2 = 0x02,
+ DW_EH_PE_udata4 = 0x03,
+ DW_EH_PE_udata8 = 0x04,
+ DW_EH_PE_sleb128 = 0x09,
+ DW_EH_PE_sdata2 = 0x0A,
+ DW_EH_PE_sdata4 = 0x0B,
+ DW_EH_PE_sdata8 = 0x0C,
+ DW_EH_PE_pcrel = 0x10,
+ DW_EH_PE_textrel = 0x20,
+ DW_EH_PE_datarel = 0x30,
+ DW_EH_PE_funcrel = 0x40,
+ DW_EH_PE_aligned = 0x50,
+
+ // The GNU toolchain sources define this enum value as well,
+ // simply to help classify the lower nybble values into signed and
+ // unsigned groups.
+ DW_EH_PE_signed = 0x08,
+
+ // This is not documented in LSB 4.0, but it is used in both the
+ // Linux and OS X toolchains. It can be added to any other
+ // encoding (except DW_EH_PE_aligned), and indicates that the
+ // encoded value represents the address at which the true address
+ // is stored, not the true address itself.
+ DW_EH_PE_indirect = 0x80
+};
+
+// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
+// because it conflicts with a macro
+enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE };
+
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information and Linux C++ exception handling data.
+class ByteReader {
+ public:
+ // Construct a ByteReader capable of reading one-, two-, four-, and
+ // eight-byte values according to ENDIANNESS, absolute machine-sized
+ // addresses, DWARF-style "initial length" values, signed and
+ // unsigned LEB128 numbers, and Linux C++ exception handling data's
+ // encoded pointers.
+ explicit ByteReader(enum Endianness endianness);
+ virtual ~ByteReader();
+
+ // Read a single byte from BUFFER and return it as an unsigned 8 bit
+ // number.
+ uint8 ReadOneByte(const char* buffer) const;
+
+ // Read two bytes from BUFFER and return them as an unsigned 16 bit
+ // number, using this ByteReader's endianness.
+ uint16 ReadTwoBytes(const char* buffer) const;
+
+ // Read four bytes from BUFFER and return them as an unsigned 32 bit
+ // number, using this ByteReader's endianness. This function returns
+ // a uint64 so that it is compatible with ReadAddress and
+ // ReadOffset. The number it returns will never be outside the range
+ // of an unsigned 32 bit integer.
+ uint64 ReadFourBytes(const char* buffer) const;
+
+ // Read eight bytes from BUFFER and return them as an unsigned 64
+ // bit number, using this ByteReader's endianness.
+ uint64 ReadEightBytes(const char* buffer) const;
+
+ // Read an unsigned LEB128 (Little Endian Base 128) number from
+ // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+ // the number of bytes read.
+ //
+ // The unsigned LEB128 representation of an integer N is a variable
+ // number of bytes:
+ //
+ // - If N is between 0 and 0x7f, then its unsigned LEB128
+ // representation is a single byte whose value is N.
+ //
+ // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+ // 0x80, followed by the unsigned LEB128 representation of N /
+ // 128, rounded towards negative infinity.
+ //
+ // In other words, we break VALUE into groups of seven bits, put
+ // them in little-endian order, and then write them as eight-bit
+ // bytes with the high bit on all but the last.
+ uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
+
+ // Read a signed LEB128 number from BUFFER and return it as an
+ // signed 64 bit integer. Set LEN to the number of bytes read.
+ //
+ // The signed LEB128 representation of an integer N is a variable
+ // number of bytes:
+ //
+ // - If N is between -0x40 and 0x3f, then its signed LEB128
+ // representation is a single byte whose value is N in two's
+ // complement.
+ //
+ // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+ // 0x80, followed by the signed LEB128 representation of N / 128,
+ // rounded towards negative infinity.
+ //
+ // In other words, we break VALUE into groups of seven bits, put
+ // them in little-endian order, and then write them as eight-bit
+ // bytes with the high bit on all but the last.
+ int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
+
+ // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+ // must be either 4 or 8. (DWARF allows addresses to be any number of
+ // bytes in length from 1 to 255, but we only support 32- and 64-bit
+ // addresses at the moment.) You must call this before using the
+ // ReadAddress member function.
+ //
+ // For data in a .debug_info section, or something that .debug_info
+ // refers to like line number or macro data, the compilation unit
+ // header's address_size field indicates the address size to use. Call
+ // frame information doesn't indicate its address size (a shortcoming of
+ // the spec); you must supply the appropriate size based on the
+ // architecture of the target machine.
+ void SetAddressSize(uint8 size);
+
+ // Return the current address size, in bytes. This is either 4,
+ // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+ uint8 AddressSize() const { return address_size_; }
+
+ // Read an address from BUFFER and return it as an unsigned 64 bit
+ // integer, respecting this ByteReader's endianness and address size. You
+ // must call SetAddressSize before calling this function.
+ uint64 ReadAddress(const char* buffer) const;
+
+ // DWARF actually defines two slightly different formats: 32-bit DWARF
+ // and 64-bit DWARF. This is *not* related to the size of registers or
+ // addresses on the target machine; it refers only to the size of section
+ // offsets and data lengths appearing in the DWARF data. One only needs
+ // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+ // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+ // debugging data itself is very large.
+ //
+ // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+ // compilation unit and call frame information entry begins with an
+ // "initial length" field, which, in addition to giving the length of the
+ // data, also indicates the size of section offsets and lengths appearing
+ // in that data. The ReadInitialLength member function, below, reads an
+ // initial length and sets the ByteReader's offset size as a side effect.
+ // Thus, in the normal process of reading DWARF data, the appropriate
+ // offset size is set automatically. So, you should only need to call
+ // SetOffsetSize if you are using the same ByteReader to jump from the
+ // midst of one block of DWARF data into another.
+
+ // Read a DWARF "initial length" field from START, and return it as
+ // an unsigned 64 bit integer, respecting this ByteReader's
+ // endianness. Set *LEN to the length of the initial length in
+ // bytes, either four or twelve. As a side effect, set this
+ // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+ // initial length) or 8 (if we see a 64-bit DWARF initial length).
+ //
+ // A DWARF initial length is either:
+ //
+ // - a byte count stored as an unsigned 32-bit value less than
+ // 0xffffff00, indicating that the data whose length is being
+ // measured uses the 32-bit DWARF format, or
+ //
+ // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+ // indicating that the data whose length is being measured uses
+ // the 64-bit DWARF format.
+ uint64 ReadInitialLength(const char* start, size_t* len);
+
+ // Read an offset from BUFFER and return it as an unsigned 64 bit
+ // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+ // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+ // long. You must call ReadInitialLength or SetOffsetSize before calling
+ // this function; see the comments above for details.
+ uint64 ReadOffset(const char* buffer) const;
+
+ // Return the current offset size, in bytes.
+ // A return value of 4 indicates that we are reading 32-bit DWARF.
+ // A return value of 8 indicates that we are reading 64-bit DWARF.
+ uint8 OffsetSize() const { return offset_size_; }
+
+ // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+ // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+ // Usually, you should not call this function yourself; instead, let a
+ // call to ReadInitialLength establish the data's offset size
+ // automatically.
+ void SetOffsetSize(uint8 size);
+
+ // The Linux C++ ABI uses a variant of DWARF call frame information
+ // for exception handling. This data is included in the program's
+ // address space as the ".eh_frame" section, and intepreted at
+ // runtime to walk the stack, find exception handlers, and run
+ // cleanup code. The format is mostly the same as DWARF CFI, with
+ // some adjustments made to provide the additional
+ // exception-handling data, and to make the data easier to work with
+ // in memory --- for example, to allow it to be placed in read-only
+ // memory even when describing position-independent code.
+ //
+ // In particular, exception handling data can select a number of
+ // different encodings for pointers that appear in the data, as
+ // described by the DwarfPointerEncoding enum. There are actually
+ // four axes(!) to the encoding:
+ //
+ // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
+ // the DWARF LEB128 encoding.
+ //
+ // - The pointer's signedness: pointers can be signed or unsigned.
+ //
+ // - The pointer's base address: the data stored in the exception
+ // handling data can be the actual address (that is, an absolute
+ // pointer), or relative to one of a number of different base
+ // addreses --- including that of the encoded pointer itself, for
+ // a form of "pc-relative" addressing.
+ //
+ // - The pointer may be indirect: it may be the address where the
+ // true pointer is stored. (This is used to refer to things via
+ // global offset table entries, program linkage table entries, or
+ // other tricks used in position-independent code.)
+ //
+ // There are also two options that fall outside that matrix
+ // altogether: the pointer may be omitted, or it may have padding to
+ // align it on an appropriate address boundary. (That last option
+ // may seem like it should be just another axis, but it is not.)
+
+ // Indicate that the exception handling data is loaded starting at
+ // SECTION_BASE, and that the start of its buffer in our own memory
+ // is BUFFER_BASE. This allows us to find the address that a given
+ // byte in our buffer would have when loaded into the program the
+ // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
+ void SetCFIDataBase(uint64 section_base, const char* buffer_base);
+
+ // Indicate that the base address of the program's ".text" section
+ // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
+ void SetTextBase(uint64 text_base);
+
+ // Indicate that the base address for DW_EH_PE_datarel pointers is
+ // DATA_BASE. The proper value depends on the ABI; it is usually the
+ // address of the global offset table, held in a designated register in
+ // position-independent code. You will need to look at the startup code
+ // for the target system to be sure. I tried; my eyes bled.
+ void SetDataBase(uint64 data_base);
+
+ // Indicate that the base address for the FDE we are processing is
+ // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
+ // pointers. (This encoding does not seem to be used by the GNU
+ // toolchain.)
+ void SetFunctionBase(uint64 function_base);
+
+ // Indicate that we are no longer processing any FDE, so any use of
+ // a DW_EH_PE_funcrel encoding is an error.
+ void ClearFunctionBase();
+
+ // Return true if ENCODING is a valid pointer encoding.
+ bool ValidEncoding(DwarfPointerEncoding encoding) const;
+
+ // Return true if we have all the information we need to read a
+ // pointer that uses ENCODING. This checks that the appropriate
+ // SetFooBase function for ENCODING has been called.
+ bool UsableEncoding(DwarfPointerEncoding encoding) const;
+
+ // Read an encoded pointer from BUFFER using ENCODING; return the
+ // absolute address it represents, and set *LEN to the pointer's
+ // length in bytes, including any padding for aligned pointers.
+ //
+ // This function calls 'abort' if ENCODING is invalid or refers to a
+ // base address this reader hasn't been given, so you should check
+ // with ValidEncoding and UsableEncoding first if you would rather
+ // die in a more helpful way.
+ uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding,
+ size_t* len) const;
+
+ private:
+ // Function pointer type for our address and offset readers.
+ typedef uint64 (ByteReader::*AddressReader)(const char*) const;
+
+ // Read an offset from BUFFER and return it as an unsigned 64 bit
+ // integer. DWARF2/3 define offsets as either 4 or 8 bytes,
+ // generally depending on the amount of DWARF2/3 info present.
+ // This function pointer gets set by SetOffsetSize.
+ AddressReader offset_reader_;
+
+ // Read an address from BUFFER and return it as an unsigned 64 bit
+ // integer. DWARF2/3 allow addresses to be any size from 0-255
+ // bytes currently. Internally we support 4 and 8 byte addresses,
+ // and will CHECK on anything else.
+ // This function pointer gets set by SetAddressSize.
+ AddressReader address_reader_;
+
+ Endianness endian_;
+ uint8 address_size_;
+ uint8 offset_size_;
+
+ // Base addresses for Linux C++ exception handling data's encoded pointers.
+ bool have_section_base_, have_text_base_, have_data_base_;
+ bool have_function_base_;
+ uint64 section_base_;
+ uint64 text_base_, data_base_, function_base_;
+ const char* buffer_base_;
+};
+
+inline uint8 ByteReader::ReadOneByte(const char* buffer) const {
+ return buffer[0];
+}
+
+inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const {
+ const unsigned char* buffer =
+ reinterpret_cast<const unsigned char*>(signed_buffer);
+ const uint16 buffer0 = buffer[0];
+ const uint16 buffer1 = buffer[1];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8;
+ } else {
+ return buffer1 | buffer0 << 8;
+ }
+}
+
+inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const {
+ const unsigned char* buffer =
+ reinterpret_cast<const unsigned char*>(signed_buffer);
+ const uint32 buffer0 = buffer[0];
+ const uint32 buffer1 = buffer[1];
+ const uint32 buffer2 = buffer[2];
+ const uint32 buffer3 = buffer[3];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24;
+ } else {
+ return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24;
+ }
+}
+
+inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const {
+ const unsigned char* buffer =
+ reinterpret_cast<const unsigned char*>(signed_buffer);
+ const uint64 buffer0 = buffer[0];
+ const uint64 buffer1 = buffer[1];
+ const uint64 buffer2 = buffer[2];
+ const uint64 buffer3 = buffer[3];
+ const uint64 buffer4 = buffer[4];
+ const uint64 buffer5 = buffer[5];
+ const uint64 buffer6 = buffer[6];
+ const uint64 buffer7 = buffer[7];
+ if (endian_ == ENDIANNESS_LITTLE) {
+ return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 |
+ buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56;
+ } else {
+ return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 |
+ buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56;
+ }
+}
+
+// Read an unsigned LEB128 number. Each byte contains 7 bits of
+// information, plus one bit saying whether the number continues or
+// not.
+
+inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer,
+ size_t* len) const {
+ uint64 result = 0;
+ size_t num_read = 0;
+ unsigned int shift = 0;
+ unsigned char byte;
+
+ do {
+ byte = *buffer++;
+ num_read++;
+
+ result |= (static_cast<uint64>(byte & 0x7f)) << shift;
+
+ shift += 7;
+
+ } while (byte & 0x80);
+
+ *len = num_read;
+
+ return result;
+}
+
+// Read a signed LEB128 number. These are like regular LEB128
+// numbers, except the last byte may have a sign bit set.
+
+inline int64 ByteReader::ReadSignedLEB128(const char* buffer,
+ size_t* len) const {
+ int64 result = 0;
+ unsigned int shift = 0;
+ size_t num_read = 0;
+ unsigned char byte;
+
+ do {
+ byte = *buffer++;
+ num_read++;
+ result |= (static_cast<uint64>(byte & 0x7f) << shift);
+ shift += 7;
+ } while (byte & 0x80);
+
+ if ((shift < 8 * sizeof(result)) && (byte & 0x40))
+ result |= -((static_cast<int64>(1)) << shift);
+ *len = num_read;
+ return result;
+}
+
+inline uint64 ByteReader::ReadOffset(const char* buffer) const {
+ MOZ_ASSERT(this->offset_reader_);
+ return (this->*offset_reader_)(buffer);
+}
+
+inline uint64 ByteReader::ReadAddress(const char* buffer) const {
+ MOZ_ASSERT(this->address_reader_);
+ return (this->*address_reader_)(buffer);
+}
+
+inline void ByteReader::SetCFIDataBase(uint64 section_base,
+ const char* buffer_base) {
+ section_base_ = section_base;
+ buffer_base_ = buffer_base;
+ have_section_base_ = true;
+}
+
+inline void ByteReader::SetTextBase(uint64 text_base) {
+ text_base_ = text_base;
+ have_text_base_ = true;
+}
+
+inline void ByteReader::SetDataBase(uint64 data_base) {
+ data_base_ = data_base;
+ have_data_base_ = true;
+}
+
+inline void ByteReader::SetFunctionBase(uint64 function_base) {
+ function_base_ = function_base;
+ have_function_base_ = true;
+}
+
+inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; }
+
+// (derived from)
+// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which
+// accepts parsed DWARF call frame info and adds it to a Summariser object.
+
+// This class is a reader for DWARF's Call Frame Information. CFI
+// describes how to unwind stack frames --- even for functions that do
+// not follow fixed conventions for saving registers, whose frame size
+// varies as they execute, etc.
+//
+// CFI describes, at each machine instruction, how to compute the
+// stack frame's base address, how to find the return address, and
+// where to find the saved values of the caller's registers (if the
+// callee has stashed them somewhere to free up the registers for its
+// own use).
+//
+// For example, suppose we have a function whose machine code looks
+// like this (imagine an assembly language that looks like C, for a
+// machine with 32-bit registers, and a stack that grows towards lower
+// addresses):
+//
+// func: ; entry point; return address at sp
+// func+0: sp = sp - 16 ; allocate space for stack frame
+// func+1: sp[12] = r0 ; save r0 at sp+12
+// ... ; other code, not frame-related
+// func+10: sp -= 4; *sp = x ; push some x on the stack
+// ... ; other code, not frame-related
+// func+20: r0 = sp[16] ; restore saved r0
+// func+21: sp += 20 ; pop whole stack frame
+// func+22: pc = *sp; sp += 4 ; pop return address and jump to it
+//
+// DWARF CFI is (a very compressed representation of) a table with a
+// row for each machine instruction address and a column for each
+// register showing how to restore it, if possible.
+//
+// A special column named "CFA", for "Canonical Frame Address", tells how
+// to compute the base address of the frame; registers' entries may
+// refer to the CFA in describing where the registers are saved.
+//
+// Another special column, named "RA", represents the return address.
+//
+// For example, here is a complete (uncompressed) table describing the
+// function above:
+//
+// insn cfa r0 r1 ... ra
+// =======================================
+// func+0: sp cfa[0]
+// func+1: sp+16 cfa[0]
+// func+2: sp+16 cfa[-4] cfa[0]
+// func+11: sp+20 cfa[-4] cfa[0]
+// func+21: sp+20 cfa[0]
+// func+22: sp cfa[0]
+//
+// Some things to note here:
+//
+// - Each row describes the state of affairs *before* executing the
+// instruction at the given address. Thus, the row for func+0
+// describes the state before we allocate the stack frame. In the
+// next row, the formula for computing the CFA has changed,
+// reflecting that allocation.
+//
+// - The other entries are written in terms of the CFA; this allows
+// them to remain unchanged as the stack pointer gets bumped around.
+// For example, the rule for recovering the return address (the "ra"
+// column) remains unchanged throughout the function, even as the
+// stack pointer takes on three different offsets from the return
+// address.
+//
+// - Although we haven't shown it, most calling conventions designate
+// "callee-saves" and "caller-saves" registers. The callee must
+// preserve the values of callee-saves registers; if it uses them,
+// it must save their original values somewhere, and restore them
+// before it returns. In contrast, the callee is free to trash
+// caller-saves registers; if the callee uses these, it will
+// probably not bother to save them anywhere, and the CFI will
+// probably mark their values as "unrecoverable".
+//
+// (However, since the caller cannot assume the callee was going to
+// save them, caller-saves registers are probably dead in the caller
+// anyway, so compilers usually don't generate CFA for caller-saves
+// registers.)
+//
+// - Exactly where the CFA points is a matter of convention that
+// depends on the architecture and ABI in use. In the example, the
+// CFA is the value the stack pointer had upon entry to the
+// function, pointing at the saved return address. But on the x86,
+// the call frame information generated by GCC follows the
+// convention that the CFA is the address *after* the saved return
+// address.
+//
+// But by definition, the CFA remains constant throughout the
+// lifetime of the frame. This makes it a useful value for other
+// columns to refer to. It is also gives debuggers a useful handle
+// for identifying a frame.
+//
+// If you look at the table above, you'll notice that a given entry is
+// often the same as the one immediately above it: most instructions
+// change only one or two aspects of the stack frame, if they affect
+// it at all. The DWARF format takes advantage of this fact, and
+// reduces the size of the data by mentioning only the addresses and
+// columns at which changes take place. So for the above, DWARF CFI
+// data would only actually mention the following:
+//
+// insn cfa r0 r1 ... ra
+// =======================================
+// func+0: sp cfa[0]
+// func+1: sp+16
+// func+2: cfa[-4]
+// func+11: sp+20
+// func+21: r0
+// func+22: sp
+//
+// In fact, this is the way the parser reports CFI to the consumer: as
+// a series of statements of the form, "At address X, column Y changed
+// to Z," and related conventions for describing the initial state.
+//
+// Naturally, it would be impractical to have to scan the entire
+// program's CFI, noting changes as we go, just to recover the
+// unwinding rules in effect at one particular instruction. To avoid
+// this, CFI data is grouped into "entries", each of which covers a
+// specified range of addresses and begins with a complete statement
+// of the rules for all recoverable registers at that starting
+// address. Each entry typically covers a single function.
+//
+// Thus, to compute the contents of a given row of the table --- that
+// is, rules for recovering the CFA, RA, and registers at a given
+// instruction --- the consumer should find the entry that covers that
+// instruction's address, start with the initial state supplied at the
+// beginning of the entry, and work forward until it has processed all
+// the changes up to and including those for the present instruction.
+//
+// There are seven kinds of rules that can appear in an entry of the
+// table:
+//
+// - "undefined": The given register is not preserved by the callee;
+// its value cannot be recovered.
+//
+// - "same value": This register has the same value it did in the callee.
+//
+// - offset(N): The register is saved at offset N from the CFA.
+//
+// - val_offset(N): The value the register had in the caller is the
+// CFA plus offset N. (This is usually only useful for describing
+// the stack pointer.)
+//
+// - register(R): The register's value was saved in another register R.
+//
+// - expression(E): Evaluating the DWARF expression E using the
+// current frame's registers' values yields the address at which the
+// register was saved.
+//
+// - val_expression(E): Evaluating the DWARF expression E using the
+// current frame's registers' values yields the value the register
+// had in the caller.
+
+class CallFrameInfo {
+ public:
+ // The different kinds of entries one finds in CFI. Used internally,
+ // and for error reporting.
+ enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
+
+ // The handler class to which the parser hands the parsed call frame
+ // information. Defined below.
+ class Handler;
+
+ // A reporter class, which CallFrameInfo uses to report errors
+ // encountered while parsing call frame information. Defined below.
+ class Reporter;
+
+ // Create a DWARF CFI parser. BUFFER points to the contents of the
+ // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+ // REPORTER is an error reporter the parser should use to report
+ // problems. READER is a ByteReader instance that has the endianness and
+ // address size set properly. Report the data we find to HANDLER.
+ //
+ // This class can also parse Linux C++ exception handling data, as found
+ // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+ // placed in loadable segments so that it is present in the program's
+ // address space, and is interpreted by the C++ runtime to search the
+ // call stack for a handler interested in the exception being thrown,
+ // actually pop the frames, and find cleanup code to run.
+ //
+ // There are two differences between the call frame information described
+ // in the DWARF standard and the exception handling data Linux places in
+ // the .eh_frame section:
+ //
+ // - Exception handling data uses uses a different format for call frame
+ // information entry headers. The distinguished CIE id, the way FDEs
+ // refer to their CIEs, and the way the end of the series of entries is
+ // determined are all slightly different.
+ //
+ // If the constructor's EH_FRAME argument is true, then the
+ // CallFrameInfo parses the entry headers as Linux C++ exception
+ // handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+ // parses standard DWARF call frame information.
+ //
+ // - Linux C++ exception handling data uses CIE augmentation strings
+ // beginning with 'z' to specify the presence of additional data after
+ // the CIE and FDE headers and special encodings used for addresses in
+ // frame description entries.
+ //
+ // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+ // exception handling data if you have supplied READER with the base
+ // addresses needed to interpret the pointer encodings that 'z'
+ // augmentations can specify. See the ByteReader interface for details
+ // about the base addresses. See the CallFrameInfo::Handler interface
+ // for details about the additional information one might find in
+ // 'z'-augmented data.
+ //
+ // Thus:
+ //
+ // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+ // section, you should pass false for the EH_FRAME argument, or omit
+ // it, and you need not worry about providing READER with the
+ // additional base addresses.
+ //
+ // - If you want to parse Linux C++ exception handling data from a
+ // .eh_frame section, you should pass EH_FRAME as true, and call
+ // READER's Set*Base member functions before calling our Start method.
+ //
+ // - If you want to parse DWARF CFI that uses the 'z' augmentations
+ // (although I don't think any toolchain ever emits such data), you
+ // could pass false for EH_FRAME, but call READER's Set*Base members.
+ //
+ // The extensions the Linux C++ ABI makes to DWARF for exception
+ // handling are described here, rather poorly:
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+ // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+ //
+ // The mechanics of C++ exception handling, personality routines,
+ // and language-specific data areas are described here, rather nicely:
+ // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+
+ CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader,
+ Handler* handler, Reporter* reporter, bool eh_frame = false)
+ : buffer_(buffer),
+ buffer_length_(buffer_length),
+ reader_(reader),
+ handler_(handler),
+ reporter_(reporter),
+ eh_frame_(eh_frame) {}
+
+ ~CallFrameInfo() {}
+
+ // Parse the entries in BUFFER, reporting what we find to HANDLER.
+ // Return true if we reach the end of the section successfully, or
+ // false if we encounter an error.
+ bool Start();
+
+ // Return the textual name of KIND. For error reporting.
+ static const char* KindName(EntryKind kind);
+
+ private:
+ struct CIE;
+
+ // A CFI entry, either an FDE or a CIE.
+ struct Entry {
+ // The starting offset of the entry in the section, for error
+ // reporting.
+ size_t offset;
+
+ // The start of this entry in the buffer.
+ const char* start;
+
+ // Which kind of entry this is.
+ //
+ // We want to be able to use this for error reporting even while we're
+ // in the midst of parsing. Error reporting code may assume that kind,
+ // offset, and start fields are valid, although kind may be kUnknown.
+ EntryKind kind;
+
+ // The end of this entry's common prologue (initial length and id), and
+ // the start of this entry's kind-specific fields.
+ const char* fields;
+
+ // The start of this entry's instructions.
+ const char* instructions;
+
+ // The address past the entry's last byte in the buffer. (Note that
+ // since offset points to the entry's initial length field, and the
+ // length field is the number of bytes after that field, this is not
+ // simply buffer_ + offset + length.)
+ const char* end;
+
+ // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+ // CIE, and the offset of the associated CIE in an FDE.
+ uint64 id;
+
+ // The CIE that applies to this entry, if we've parsed it. If this is a
+ // CIE, then this field points to this structure.
+ CIE* cie;
+ };
+
+ // A common information entry (CIE).
+ struct CIE : public Entry {
+ uint8 version; // CFI data version number
+ std::string augmentation; // vendor format extension markers
+ uint64 code_alignment_factor; // scale for code address adjustments
+ int data_alignment_factor; // scale for stack pointer adjustments
+ unsigned return_address_register; // which register holds the return addr
+
+ // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+ bool has_z_augmentation;
+
+ // Parsed 'z' augmentation data. These are meaningful only if
+ // has_z_augmentation is true.
+ bool has_z_lsda; // The 'z' augmentation included 'L'.
+ bool has_z_personality; // The 'z' augmentation included 'P'.
+ bool has_z_signal_frame; // The 'z' augmentation included 'S'.
+
+ // If has_z_lsda is true, this is the encoding to be used for language-
+ // specific data area pointers in FDEs.
+ DwarfPointerEncoding lsda_encoding;
+
+ // If has_z_personality is true, this is the encoding used for the
+ // personality routine pointer in the augmentation data.
+ DwarfPointerEncoding personality_encoding;
+
+ // If has_z_personality is true, this is the address of the personality
+ // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+ // address where the personality routine's address is stored.
+ uint64 personality_address;
+
+ // This is the encoding used for addresses in the FDE header and
+ // in DW_CFA_set_loc instructions. This is always valid, whether
+ // or not we saw a 'z' augmentation string; its default value is
+ // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+ DwarfPointerEncoding pointer_encoding;
+ };
+
+ // A frame description entry (FDE).
+ struct FDE : public Entry {
+ uint64 address; // start address of described code
+ uint64 size; // size of described code, in bytes
+
+ // If cie->has_z_lsda is true, then this is the language-specific data
+ // area's address --- or its address's address, if cie->lsda_encoding
+ // has the DW_EH_PE_indirect bit set.
+ uint64 lsda_address;
+ };
+
+ // Internal use.
+ class Rule;
+ class UndefinedRule;
+ class SameValueRule;
+ class OffsetRule;
+ class ValOffsetRule;
+ class RegisterRule;
+ class ExpressionRule;
+ class ValExpressionRule;
+ class RuleMap;
+ class State;
+
+ // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+ // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+ // data to parse. On success, populate ENTRY as appropriate, and return
+ // true. On failure, report the problem, and return false. Even if we
+ // return false, set ENTRY->end to the first byte after the entry if we
+ // were able to figure that out, or NULL if we weren't.
+ bool ReadEntryPrologue(const char* cursor, Entry* entry);
+
+ // Parse the fields of a CIE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of CIE are
+ // populated; use CIE->fields and CIE->end as the start and limit for
+ // parsing. On success, populate the rest of *CIE, and return true; on
+ // failure, report the problem and return false.
+ bool ReadCIEFields(CIE* cie);
+
+ // Parse the fields of an FDE after the entry prologue, including any 'z'
+ // augmentation data. Assume that the 'Entry' fields of *FDE are
+ // initialized; use FDE->fields and FDE->end as the start and limit for
+ // parsing. Assume that FDE->cie is fully initialized. On success,
+ // populate the rest of *FDE, and return true; on failure, report the
+ // problem and return false.
+ bool ReadFDEFields(FDE* fde);
+
+ // Report that ENTRY is incomplete, and return false. This is just a
+ // trivial wrapper for invoking reporter_->Incomplete; it provides a
+ // little brevity.
+ bool ReportIncomplete(Entry* entry);
+
+ // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+ static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+ return encoding & DW_EH_PE_indirect;
+ }
+
+ // The contents of the DWARF .debug_info section we're parsing.
+ const char* buffer_;
+ size_t buffer_length_;
+
+ // For reading multi-byte values with the appropriate endianness.
+ ByteReader* reader_;
+
+ // The handler to which we should report the data we find.
+ Handler* handler_;
+
+ // For reporting problems in the info we're parsing.
+ Reporter* reporter_;
+
+ // True if we are processing .eh_frame-format data.
+ bool eh_frame_;
+};
+
+// The handler class for CallFrameInfo. The a CFI parser calls the
+// member functions of a handler object to report the data it finds.
+class CallFrameInfo::Handler {
+ public:
+ // The pseudo-register number for the canonical frame address.
+ enum { kCFARegister = DW_REG_CFA };
+
+ Handler() {}
+ virtual ~Handler() {}
+
+ // The parser has found CFI for the machine code at ADDRESS,
+ // extending for LENGTH bytes. OFFSET is the offset of the frame
+ // description entry in the section, for use in error messages.
+ // VERSION is the version number of the CFI format. AUGMENTATION is
+ // a string describing any producer-specific extensions present in
+ // the data. RETURN_ADDRESS is the number of the register that holds
+ // the address to which the function should return.
+ //
+ // Entry should return true to process this CFI, or false to skip to
+ // the next entry.
+ //
+ // The parser invokes Entry for each Frame Description Entry (FDE)
+ // it finds. The parser doesn't report Common Information Entries
+ // to the handler explicitly; instead, if the handler elects to
+ // process a given FDE, the parser reiterates the appropriate CIE's
+ // contents at the beginning of the FDE's rules.
+ virtual bool Entry(size_t offset, uint64 address, uint64 length,
+ uint8 version, const std::string& augmentation,
+ unsigned return_address) = 0;
+
+ // When the Entry function returns true, the parser calls these
+ // handler functions repeatedly to describe the rules for recovering
+ // registers at each instruction in the given range of machine code.
+ // Immediately after a call to Entry, the handler should assume that
+ // the rule for each callee-saves register is "unchanged" --- that
+ // is, that the register still has the value it had in the caller.
+ //
+ // If a *Rule function returns true, we continue processing this entry's
+ // instructions. If a *Rule function returns false, we stop evaluating
+ // instructions, and skip to the next entry. Either way, we call End
+ // before going on to the next entry.
+ //
+ // In all of these functions, if the REG parameter is kCFARegister, then
+ // the rule describes how to find the canonical frame address.
+ // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
+ // the canonical frame address should be used as the base address for the
+ // computation. All other REG values will be positive.
+
+ // At ADDRESS, register REG's value is not recoverable.
+ virtual bool UndefinedRule(uint64 address, int reg) = 0;
+
+ // At ADDRESS, register REG's value is the same as that it had in
+ // the caller.
+ virtual bool SameValueRule(uint64 address, int reg) = 0;
+
+ // At ADDRESS, register REG has been saved at offset OFFSET from
+ // BASE_REGISTER.
+ virtual bool OffsetRule(uint64 address, int reg, int base_register,
+ long offset) = 0;
+
+ // At ADDRESS, the caller's value of register REG is the current
+ // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
+ // address at which the register's value is saved.)
+ virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+ long offset) = 0;
+
+ // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
+ // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
+ // BASE_REGISTER is the "home" for REG's saved value: if you want to
+ // assign to a variable whose home is REG in the calling frame, you
+ // should put the value in BASE_REGISTER.
+ virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0;
+
+ // At ADDRESS, the DWARF expression EXPRESSION yields the address at
+ // which REG was saved.
+ virtual bool ExpressionRule(uint64 address, int reg,
+ const std::string& expression) = 0;
+
+ // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
+ // value for REG. (This rule doesn't provide an address at which the
+ // register's value is saved.)
+ virtual bool ValExpressionRule(uint64 address, int reg,
+ const std::string& expression) = 0;
+
+ // Indicate that the rules for the address range reported by the
+ // last call to Entry are complete. End should return true if
+ // everything is okay, or false if an error has occurred and parsing
+ // should stop.
+ virtual bool End() = 0;
+
+ // Handler functions for Linux C++ exception handling data. These are
+ // only called if the data includes 'z' augmentation strings.
+
+ // The Linux C++ ABI uses an extension of the DWARF CFI format to
+ // walk the stack to propagate exceptions from the throw to the
+ // appropriate catch, and do the appropriate cleanups along the way.
+ // CFI entries used for exception handling have two additional data
+ // associated with them:
+ //
+ // - The "language-specific data area" describes which exception
+ // types the function has 'catch' clauses for, and indicates how
+ // to go about re-entering the function at the appropriate catch
+ // clause. If the exception is not caught, it describes the
+ // destructors that must run before the frame is popped.
+ //
+ // - The "personality routine" is responsible for interpreting the
+ // language-specific data area's contents, and deciding whether
+ // the exception should continue to propagate down the stack,
+ // perhaps after doing some cleanup for this frame, or whether the
+ // exception will be caught here.
+ //
+ // In principle, the language-specific data area is opaque to
+ // everybody but the personality routine. In practice, these values
+ // may be useful or interesting to readers with extra context, and
+ // we have to at least skip them anyway, so we might as well report
+ // them to the handler.
+
+ // This entry's exception handling personality routine's address is
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the routine's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool PersonalityRoutine(uint64 address, bool indirect) {
+ return true;
+ }
+
+ // This entry's language-specific data area (LSDA) is located at
+ // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+ // which the area's address is stored. The default definition for
+ // this handler function simply returns true, allowing parsing of
+ // the entry to continue.
+ virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
+ return true;
+ }
+
+ // This entry describes a signal trampoline --- this frame is the
+ // caller of a signal handler. The default definition for this
+ // handler function simply returns true, allowing parsing of the
+ // entry to continue.
+ //
+ // The best description of the rationale for and meaning of signal
+ // trampoline CFI entries seems to be in the GCC bug database:
+ // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+ virtual bool SignalHandler() { return true; }
+};
+
+// The CallFrameInfo class makes calls on an instance of this class to
+// report errors or warn about problems in the data it is parsing.
+// These messages are sent to the message sink |aLog| provided to the
+// constructor.
+class CallFrameInfo::Reporter {
+ public:
+ // Create an error reporter which attributes troubles to the section
+ // named SECTION in FILENAME.
+ //
+ // Normally SECTION would be .debug_frame, but the Mac puts CFI data
+ // in a Mach-O section named __debug_frame. If we support
+ // Linux-style exception handling data, we could be reading an
+ // .eh_frame section.
+ Reporter(void (*aLog)(const char*), const std::string& filename,
+ const std::string& section = ".debug_frame")
+ : log_(aLog), filename_(filename), section_(section) {}
+ virtual ~Reporter() {}
+
+ // The CFI entry at OFFSET ends too early to be well-formed. KIND
+ // indicates what kind of entry it is; KIND can be kUnknown if we
+ // haven't parsed enough of the entry to tell yet.
+ virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
+
+ // The .eh_frame data has a four-byte zero at OFFSET where the next
+ // entry's length would be; this is a terminator. However, the buffer
+ // length as given to the CallFrameInfo constructor says there should be
+ // more data.
+ virtual void EarlyEHTerminator(uint64 offset);
+
+ // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
+ // section is not that large.
+ virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
+
+ // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
+ // there is not a CIE.
+ virtual void BadCIEId(uint64 offset, uint64 cie_offset);
+
+ // The FDE at OFFSET refers to a CIE with version number VERSION,
+ // which we don't recognize. We cannot parse DWARF CFI if it uses
+ // a version number we don't recognize.
+ virtual void UnrecognizedVersion(uint64 offset, int version);
+
+ // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
+ // which we don't recognize. We cannot parse DWARF CFI if it uses
+ // augmentations we don't recognize.
+ virtual void UnrecognizedAugmentation(uint64 offset,
+ const std::string& augmentation);
+
+ // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4
+ // specific field (currently, only "address_size" or "segment_size").
+ // Parsing DWARF CFI with unexpected values here seems dubious at best,
+ // so we stop. WHAT gives a little more information about what is wrong.
+ virtual void InvalidDwarf4Artefact(uint64 offset, const char* what);
+
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+ // a valid encoding.
+ virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
+
+ // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+ // on a base address which has not been supplied.
+ virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
+
+ // The CIE at OFFSET contains a DW_CFA_restore instruction at
+ // INSN_OFFSET, which may not appear in a CIE.
+ virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);
+
+ // The entry at OFFSET, of kind KIND, has an unrecognized
+ // instruction at INSN_OFFSET.
+ virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind,
+ uint64 insn_offset);
+
+ // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+ // KIND, establishes a rule that cites the CFA, but we have not
+ // established a CFA rule yet.
+ virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+ uint64 insn_offset);
+
+ // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+ // KIND, is a DW_CFA_restore_state instruction, but the stack of
+ // saved states is empty.
+ virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind,
+ uint64 insn_offset);
+
+ // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
+ // at OFFSET, of kind KIND, would restore a state that has no CFA
+ // rule, whereas the current state does have a CFA rule. This is
+ // bogus input, which the CallFrameInfo::Handler interface doesn't
+ // (and shouldn't) have any way to report.
+ virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+ uint64 insn_offset);
+
+ private:
+ // A logging sink function, as supplied by LUL's user.
+ void (*log_)(const char*);
+
+ protected:
+ // The name of the file whose CFI we're reading.
+ std::string filename_;
+
+ // The name of the CFI section in that file.
+ std::string section_;
+};
+
+using lul::CallFrameInfo;
+using lul::Summariser;
+
+// A class that accepts parsed call frame information from the DWARF
+// CFI parser and populates a google_breakpad::Module object with the
+// contents.
+class DwarfCFIToModule : public CallFrameInfo::Handler {
+ public:
+ // DwarfCFIToModule uses an instance of this class to report errors
+ // detected while converting DWARF CFI to Breakpad STACK CFI records.
+ class Reporter {
+ public:
+ // Create a reporter that writes messages to the message sink
+ // |aLog|. FILE is the name of the file we're processing, and
+ // SECTION is the name of the section within that file that we're
+ // looking at (.debug_frame, .eh_frame, etc.).
+ Reporter(void (*aLog)(const char*), const std::string& file,
+ const std::string& section)
+ : log_(aLog), file_(file), section_(section) {}
+ virtual ~Reporter() {}
+
+ // The DWARF CFI entry at OFFSET says that REG is undefined, but the
+ // Breakpad symbol file format cannot express this.
+ virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg);
+
+ // The DWARF CFI entry at OFFSET says that REG uses a DWARF
+ // expression to find its value, but parseDwarfExpr could not
+ // convert it to a sequence of PfxInstrs.
+ virtual void ExpressionCouldNotBeSummarised(size_t offset,
+ const UniqueString* reg);
+
+ private:
+ // A logging sink function, as supplied by LUL's user.
+ void (*log_)(const char*);
+
+ protected:
+ std::string file_, section_;
+ };
+
+ // Register name tables. If TABLE is a vector returned by one of these
+ // functions, then TABLE[R] is the name of the register numbered R in
+ // DWARF call frame information.
+ class RegisterNames {
+ public:
+ // Intel's "x86" or IA-32.
+ static unsigned int I386();
+
+ // AMD x86_64, AMD64, Intel EM64T, or Intel 64
+ static unsigned int X86_64();
+
+ // ARM.
+ static unsigned int ARM();
+
+ // AARCH64.
+ static unsigned int ARM64();
+
+ // MIPS.
+ static unsigned int MIPS();
+ };
+
+ // Create a handler for the dwarf2reader::CallFrameInfo parser that
+ // records the stack unwinding information it receives in SUMM.
+ //
+ // Use REGISTER_NAMES[I] as the name of register number I; *this
+ // keeps a reference to the vector, so the vector should remain
+ // alive for as long as the DwarfCFIToModule does.
+ //
+ // Use REPORTER for reporting problems encountered in the conversion
+ // process.
+ DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter,
+ ByteReader* reader,
+ /*MOD*/ UniqueStringUniverse* usu,
+ /*OUT*/ Summariser* summ)
+ : summ_(summ),
+ usu_(usu),
+ num_dw_regs_(num_dw_regs),
+ reporter_(reporter),
+ reader_(reader),
+ return_address_(-1) {}
+ virtual ~DwarfCFIToModule() {}
+
+ virtual bool Entry(size_t offset, uint64 address, uint64 length,
+ uint8 version, const std::string& augmentation,
+ unsigned return_address) override;
+ virtual bool UndefinedRule(uint64 address, int reg) override;
+ virtual bool SameValueRule(uint64 address, int reg) override;
+ virtual bool OffsetRule(uint64 address, int reg, int base_register,
+ long offset) override;
+ virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+ long offset) override;
+ virtual bool RegisterRule(uint64 address, int reg,
+ int base_register) override;
+ virtual bool ExpressionRule(uint64 address, int reg,
+ const std::string& expression) override;
+ virtual bool ValExpressionRule(uint64 address, int reg,
+ const std::string& expression) override;
+ virtual bool End() override;
+
+ private:
+ // Return the name to use for register I.
+ const UniqueString* RegisterName(int i);
+
+ // The Summariser to which we should give entries
+ Summariser* summ_;
+
+ // Universe for creating UniqueStrings in, should that be necessary.
+ UniqueStringUniverse* usu_;
+
+ // The number of Dwarf-defined register names for this architecture.
+ const unsigned int num_dw_regs_;
+
+ // The reporter to use to report problems.
+ Reporter* reporter_;
+
+ // The ByteReader to use for parsing Dwarf expressions.
+ ByteReader* reader_;
+
+ // The section offset of the current frame description entry, for
+ // use in error messages.
+ size_t entry_offset_;
+
+ // The return address column for that entry.
+ unsigned return_address_;
+};
+
+// Convert the Dwarf expression in |expr| into PfxInstrs stored in the
+// SecMap referred to by |summ|, and return the index of the starting
+// PfxInstr added, which must be >= 0. In case of failure return -1.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+ std::string expr, bool debug, bool pushCfaAtStart,
+ bool derefAtEnd);
+
+} // namespace lul
+
+#endif // LulDwarfExt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarfInt.h b/mozglue/baseprofiler/lul/LulDwarfInt.h
new file mode 100644
index 0000000000..b72c6e08e3
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfInt.h
@@ -0,0 +1,193 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following file in
+// toolkit/crashreporter/google-breakpad:
+// src/common/dwarf/dwarf2enums.h
+
+#ifndef LulDwarfInt_h
+#define LulDwarfInt_h
+
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+
+namespace lul {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+
+// Call Frame Info instructions.
+enum DwarfCFI {
+ DW_CFA_advance_loc = 0x40,
+ DW_CFA_offset = 0x80,
+ DW_CFA_restore = 0xc0,
+ DW_CFA_nop = 0x00,
+ DW_CFA_set_loc = 0x01,
+ DW_CFA_advance_loc1 = 0x02,
+ DW_CFA_advance_loc2 = 0x03,
+ DW_CFA_advance_loc4 = 0x04,
+ DW_CFA_offset_extended = 0x05,
+ DW_CFA_restore_extended = 0x06,
+ DW_CFA_undefined = 0x07,
+ DW_CFA_same_value = 0x08,
+ DW_CFA_register = 0x09,
+ DW_CFA_remember_state = 0x0a,
+ DW_CFA_restore_state = 0x0b,
+ DW_CFA_def_cfa = 0x0c,
+ DW_CFA_def_cfa_register = 0x0d,
+ DW_CFA_def_cfa_offset = 0x0e,
+ DW_CFA_def_cfa_expression = 0x0f,
+ DW_CFA_expression = 0x10,
+ DW_CFA_offset_extended_sf = 0x11,
+ DW_CFA_def_cfa_sf = 0x12,
+ DW_CFA_def_cfa_offset_sf = 0x13,
+ DW_CFA_val_offset = 0x14,
+ DW_CFA_val_offset_sf = 0x15,
+ DW_CFA_val_expression = 0x16,
+
+ // Opcodes in this range are reserved for user extensions.
+ DW_CFA_lo_user = 0x1c,
+ DW_CFA_hi_user = 0x3f,
+
+ // SGI/MIPS specific.
+ DW_CFA_MIPS_advance_loc8 = 0x1d,
+
+ // GNU extensions.
+ DW_CFA_GNU_window_save = 0x2d,
+ DW_CFA_GNU_args_size = 0x2e,
+ DW_CFA_GNU_negative_offset_extended = 0x2f
+};
+
+// Exception handling 'z' augmentation letters.
+enum DwarfZAugmentationCodes {
+ // If the CFI augmentation string begins with 'z', then the CIE and FDE
+ // have an augmentation data area just before the instructions, whose
+ // contents are determined by the subsequent augmentation letters.
+ DW_Z_augmentation_start = 'z',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding, and the FDE
+ // augmentation data includes a language-specific data area pointer,
+ // represented using that encoding.
+ DW_Z_has_LSDA = 'L',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding, followed by a pointer
+ // to a personality routine, represented using that encoding.
+ DW_Z_has_personality_routine = 'P',
+
+ // If this letter is present in a 'z' augmentation string, the CIE
+ // augmentation data includes a pointer encoding describing how the FDE's
+ // initial location, address range, and DW_CFA_set_loc operands are
+ // encoded.
+ DW_Z_has_FDE_address_encoding = 'R',
+
+ // If this letter is present in a 'z' augmentation string, then code
+ // addresses covered by FDEs that cite this CIE are signal delivery
+ // trampolines. Return addresses of frames in trampolines should not be
+ // adjusted as described in section 6.4.4 of the DWARF 3 spec.
+ DW_Z_is_signal_trampoline = 'S'
+};
+
+// Expression opcodes
+enum DwarfExpressionOpcodes {
+ DW_OP_addr = 0x03,
+ DW_OP_deref = 0x06,
+ DW_OP_const1s = 0x09,
+ DW_OP_const2u = 0x0a,
+ DW_OP_const2s = 0x0b,
+ DW_OP_const4u = 0x0c,
+ DW_OP_const4s = 0x0d,
+ DW_OP_const8u = 0x0e,
+ DW_OP_const8s = 0x0f,
+ DW_OP_constu = 0x10,
+ DW_OP_consts = 0x11,
+ DW_OP_dup = 0x12,
+ DW_OP_drop = 0x13,
+ DW_OP_over = 0x14,
+ DW_OP_pick = 0x15,
+ DW_OP_swap = 0x16,
+ DW_OP_rot = 0x17,
+ DW_OP_xderef = 0x18,
+ DW_OP_abs = 0x19,
+ DW_OP_and = 0x1a,
+ DW_OP_div = 0x1b,
+ DW_OP_minus = 0x1c,
+ DW_OP_mod = 0x1d,
+ DW_OP_mul = 0x1e,
+ DW_OP_neg = 0x1f,
+ DW_OP_not = 0x20,
+ DW_OP_or = 0x21,
+ DW_OP_plus = 0x22,
+ DW_OP_plus_uconst = 0x23,
+ DW_OP_shl = 0x24,
+ DW_OP_shr = 0x25,
+ DW_OP_shra = 0x26,
+ DW_OP_xor = 0x27,
+ DW_OP_skip = 0x2f,
+ DW_OP_bra = 0x28,
+ DW_OP_eq = 0x29,
+ DW_OP_ge = 0x2a,
+ DW_OP_gt = 0x2b,
+ DW_OP_le = 0x2c,
+ DW_OP_lt = 0x2d,
+ DW_OP_ne = 0x2e,
+ DW_OP_lit0 = 0x30,
+ DW_OP_lit31 = 0x4f,
+ DW_OP_reg0 = 0x50,
+ DW_OP_reg31 = 0x6f,
+ DW_OP_breg0 = 0x70,
+ DW_OP_breg31 = 0x8f,
+ DW_OP_regx = 0x90,
+ DW_OP_fbreg = 0x91,
+ DW_OP_bregx = 0x92,
+ DW_OP_piece = 0x93,
+ DW_OP_deref_size = 0x94,
+ DW_OP_xderef_size = 0x95,
+ DW_OP_nop = 0x96,
+ DW_OP_push_object_address = 0x97,
+ DW_OP_call2 = 0x98,
+ DW_OP_call4 = 0x99,
+ DW_OP_call_ref = 0x9a,
+ DW_OP_form_tls_address = 0x9b,
+ DW_OP_call_frame_cfa = 0x9c,
+ DW_OP_bit_piece = 0x9d,
+ DW_OP_lo_user = 0xe0,
+ DW_OP_hi_user = 0xff
+};
+
+} // namespace lul
+
+#endif // LulDwarfInt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp
new file mode 100644
index 0000000000..ff0f212f6c
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp
@@ -0,0 +1,553 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulDwarfExt.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_SUMMARISER 0
+
+namespace lul {
+
+// Do |s64|'s lowest 32 bits sign extend back to |s64| itself?
+static inline bool fitsIn32Bits(int64 s64) {
+ return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+}
+
+// Check a LExpr prefix expression, starting at pfxInstrs[start] up to
+// the next PX_End instruction, to ensure that:
+// * It only mentions registers that are tracked on this target
+// * The start point is sane
+// If the expression is ok, return NULL. Else return a pointer
+// a const char* holding a bit of text describing the problem.
+static const char* checkPfxExpr(const vector<PfxInstr>* pfxInstrs,
+ int64_t start) {
+ size_t nInstrs = pfxInstrs->size();
+ if (start < 0 || start >= (ssize_t)nInstrs) {
+ return "bogus start point";
+ }
+ size_t i;
+ for (i = start; i < nInstrs; i++) {
+ PfxInstr pxi = (*pfxInstrs)[i];
+ if (pxi.mOpcode == PX_End) break;
+ if (pxi.mOpcode == PX_DwReg &&
+ !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) {
+ return "uses untracked reg";
+ }
+ }
+ return nullptr; // success
+}
+
+Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias,
+ void (*aLog)(const char*))
+ : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) {
+ mCurrAddr = 0;
+ mMax1Addr = 0; // Gives an empty range.
+
+ // Initialise the running RuleSet to "haven't got a clue" status.
+ new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) {
+ aAddress += mTextBias;
+ if (DEBUG_SUMMARISER) {
+ char buf[100];
+ SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n",
+ (unsigned long long int)aAddress,
+ (unsigned long long int)aLength);
+ mLog(buf);
+ }
+ // This throws away any previous summary, that is, assumes
+ // that the previous summary, if any, has been properly finished
+ // by a call to End().
+ mCurrAddr = aAddress;
+ mMax1Addr = aAddress + aLength;
+ new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+ int16_t oldReg, int64_t offset) {
+ aAddress += mTextBias;
+ if (DEBUG_SUMMARISER) {
+ char buf[100];
+ if (how == NODEREF || how == DEREF) {
+ bool deref = how == DEREF;
+ SprintfLiteral(buf, "LUL 0x%llx old-r%d = %sr%d + %lld%s\n",
+ (unsigned long long int)aAddress, aNewReg,
+ deref ? "*(" : "", (int)oldReg, (long long int)offset,
+ deref ? ")" : "");
+ } else if (how == PFXEXPR) {
+ SprintfLiteral(buf, "LUL 0x%llx old-r%d = pfx-expr-at %lld\n",
+ (unsigned long long int)aAddress, aNewReg,
+ (long long int)offset);
+ } else {
+ SprintfLiteral(buf, "LUL 0x%llx old-r%d = (invalid LExpr!)\n",
+ (unsigned long long int)aAddress, aNewReg);
+ }
+ mLog(buf);
+ }
+
+ if (mCurrAddr < aAddress) {
+ // Flush the existing summary first.
+ mCurrRules.mAddr = mCurrAddr;
+ mCurrRules.mLen = aAddress - mCurrAddr;
+ mSecMap->AddRuleSet(&mCurrRules);
+ if (DEBUG_SUMMARISER) {
+ mLog("LUL ");
+ mCurrRules.Print(mLog);
+ mLog("\n");
+ }
+ mCurrAddr = aAddress;
+ }
+
+ // If for some reason summarisation fails, either or both of these
+ // become non-null and point at constant text describing the
+ // problem. Using two rather than just one avoids complications of
+ // having to concatenate two strings to produce a complete error message.
+ const char* reason1 = nullptr;
+ const char* reason2 = nullptr;
+
+ // |offset| needs to be a 32 bit value that sign extends to 64 bits
+ // on a 64 bit target. We will need to incorporate |offset| into
+ // any LExpr made here. So we may as well check it right now.
+ if (!fitsIn32Bits(offset)) {
+ reason1 = "offset not in signed 32-bit range";
+ goto cant_summarise;
+ }
+
+ // FIXME: factor out common parts of the arch-dependent summarisers.
+
+#if defined(GP_ARCH_arm)
+
+ // ----------------- arm ----------------- //
+
+ // Now, can we add the rule to our summary? This depends on whether
+ // the registers and the overall expression are representable. This
+ // is the heart of the summarisation process.
+ switch (aNewReg) {
+ case DW_REG_CFA:
+ // This is a rule that defines the CFA. The only forms we
+ // choose to represent are: r7/11/12/13 + offset. The offset
+ // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM,
+ // hence there is no need to check it for overflow.
+ if (how != NODEREF) {
+ reason1 = "rule for DW_REG_CFA: invalid |how|";
+ goto cant_summarise;
+ }
+ switch (oldReg) {
+ case DW_REG_ARM_R7:
+ case DW_REG_ARM_R11:
+ case DW_REG_ARM_R12:
+ case DW_REG_ARM_R13:
+ break;
+ default:
+ reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+ goto cant_summarise;
+ }
+ mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+ break;
+
+ case DW_REG_ARM_R7:
+ case DW_REG_ARM_R11:
+ case DW_REG_ARM_R12:
+ case DW_REG_ARM_R13:
+ case DW_REG_ARM_R14:
+ case DW_REG_ARM_R15: {
+ // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or
+ // R15 (the return address).
+ switch (how) {
+ case NODEREF:
+ case DEREF:
+ // Check the old register is one we're tracking.
+ if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+ oldReg != DW_REG_CFA) {
+ reason1 = "rule for R7/11/12/13/14/15: uses untracked reg";
+ goto cant_summarise;
+ }
+ break;
+ case PFXEXPR: {
+ // Check that the prefix expression only mentions tracked registers.
+ const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+ reason2 = checkPfxExpr(pfxInstrs, offset);
+ if (reason2) {
+ reason1 = "rule for R7/11/12/13/14/15: ";
+ goto cant_summarise;
+ }
+ break;
+ }
+ default:
+ goto cant_summarise;
+ }
+ LExpr expr = LExpr(how, oldReg, offset);
+ switch (aNewReg) {
+ case DW_REG_ARM_R7:
+ mCurrRules.mR7expr = expr;
+ break;
+ case DW_REG_ARM_R11:
+ mCurrRules.mR11expr = expr;
+ break;
+ case DW_REG_ARM_R12:
+ mCurrRules.mR12expr = expr;
+ break;
+ case DW_REG_ARM_R13:
+ mCurrRules.mR13expr = expr;
+ break;
+ case DW_REG_ARM_R14:
+ mCurrRules.mR14expr = expr;
+ break;
+ case DW_REG_ARM_R15:
+ mCurrRules.mR15expr = expr;
+ break;
+ default:
+ MOZ_ASSERT(0);
+ }
+ break;
+ }
+
+ default:
+ // Leave |reason1| and |reason2| unset here. This program point
+ // is reached so often that it causes a flood of "Can't
+ // summarise" messages. In any case, we don't really care about
+ // the fact that this summary would produce a new value for a
+ // register that we're not tracking. We do on the other hand
+ // care if the summary's expression *uses* a register that we're
+ // not tracking. But in that case one of the above failures
+ // should tell us which.
+ goto cant_summarise;
+ }
+
+ // Mark callee-saved registers (r4 .. r11) as unchanged, if there is
+ // no other information about them. FIXME: do this just once, at
+ // the point where the ruleset is committed.
+ if (mCurrRules.mR7expr.mHow == UNKNOWN) {
+ mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0);
+ }
+ if (mCurrRules.mR11expr.mHow == UNKNOWN) {
+ mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0);
+ }
+ if (mCurrRules.mR12expr.mHow == UNKNOWN) {
+ mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0);
+ }
+
+ // The old r13 (SP) value before the call is always the same as the
+ // CFA.
+ mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
+
+ // If there's no information about R15 (the return address), say
+ // it's a copy of R14 (the link register).
+ if (mCurrRules.mR15expr.mHow == UNKNOWN) {
+ mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
+ }
+
+#elif defined(GP_ARCH_arm64)
+
+ // ----------------- arm64 ----------------- //
+
+ switch (aNewReg) {
+ case DW_REG_CFA:
+ if (how != NODEREF) {
+ reason1 = "rule for DW_REG_CFA: invalid |how|";
+ goto cant_summarise;
+ }
+ switch (oldReg) {
+ case DW_REG_AARCH64_X29:
+ case DW_REG_AARCH64_SP:
+ break;
+ default:
+ reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+ goto cant_summarise;
+ }
+ mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+ break;
+
+ case DW_REG_AARCH64_X29:
+ case DW_REG_AARCH64_X30:
+ case DW_REG_AARCH64_SP: {
+ switch (how) {
+ case NODEREF:
+ case DEREF:
+ // Check the old register is one we're tracking.
+ if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+ oldReg != DW_REG_CFA) {
+ reason1 = "rule for X29/X30/SP: uses untracked reg";
+ goto cant_summarise;
+ }
+ break;
+ case PFXEXPR: {
+ // Check that the prefix expression only mentions tracked registers.
+ const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+ reason2 = checkPfxExpr(pfxInstrs, offset);
+ if (reason2) {
+ reason1 = "rule for X29/X30/SP: ";
+ goto cant_summarise;
+ }
+ break;
+ }
+ default:
+ goto cant_summarise;
+ }
+ LExpr expr = LExpr(how, oldReg, offset);
+ switch (aNewReg) {
+ case DW_REG_AARCH64_X29:
+ mCurrRules.mX29expr = expr;
+ break;
+ case DW_REG_AARCH64_X30:
+ mCurrRules.mX30expr = expr;
+ break;
+ case DW_REG_AARCH64_SP:
+ mCurrRules.mSPexpr = expr;
+ break;
+ default:
+ MOZ_ASSERT(0);
+ }
+ break;
+ }
+ default:
+ // Leave |reason1| and |reason2| unset here, for the reasons explained
+ // in the analogous point
+ goto cant_summarise;
+ }
+
+ if (mCurrRules.mX29expr.mHow == UNKNOWN) {
+ mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0);
+ }
+ if (mCurrRules.mX30expr.mHow == UNKNOWN) {
+ mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0);
+ }
+ // On aarch64, it seems the old SP value before the call is always the
+ // same as the CFA. Therefore, in the absence of any other way to
+ // recover the SP, specify that the CFA should be copied.
+ if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+ mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+ }
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+
+ // ---------------- x64/x86 ---------------- //
+
+ // Now, can we add the rule to our summary? This depends on whether
+ // the registers and the overall expression are representable. This
+ // is the heart of the summarisation process.
+ switch (aNewReg) {
+ case DW_REG_CFA: {
+ // This is a rule that defines the CFA. The only forms we choose to
+ // represent are: = SP+offset, = FP+offset, or =prefix-expr.
+ switch (how) {
+ case NODEREF:
+ if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) {
+ reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+ goto cant_summarise;
+ }
+ break;
+ case DEREF:
+ reason1 = "rule for DW_REG_CFA: invalid |how|";
+ goto cant_summarise;
+ case PFXEXPR: {
+ // Check that the prefix expression only mentions tracked registers.
+ const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+ reason2 = checkPfxExpr(pfxInstrs, offset);
+ if (reason2) {
+ reason1 = "rule for CFA: ";
+ goto cant_summarise;
+ }
+ break;
+ }
+ default:
+ goto cant_summarise;
+ }
+ mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+ break;
+ }
+
+ case DW_REG_INTEL_XSP:
+ case DW_REG_INTEL_XBP:
+ case DW_REG_INTEL_XIP: {
+ // This is a new rule for XSP, XBP or XIP (the return address).
+ switch (how) {
+ case NODEREF:
+ case DEREF:
+ // Check the old register is one we're tracking.
+ if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+ oldReg != DW_REG_CFA) {
+ reason1 = "rule for XSP/XBP/XIP: uses untracked reg";
+ goto cant_summarise;
+ }
+ break;
+ case PFXEXPR: {
+ // Check that the prefix expression only mentions tracked registers.
+ const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+ reason2 = checkPfxExpr(pfxInstrs, offset);
+ if (reason2) {
+ reason1 = "rule for XSP/XBP/XIP: ";
+ goto cant_summarise;
+ }
+ break;
+ }
+ default:
+ goto cant_summarise;
+ }
+ LExpr expr = LExpr(how, oldReg, offset);
+ switch (aNewReg) {
+ case DW_REG_INTEL_XBP:
+ mCurrRules.mXbpExpr = expr;
+ break;
+ case DW_REG_INTEL_XSP:
+ mCurrRules.mXspExpr = expr;
+ break;
+ case DW_REG_INTEL_XIP:
+ mCurrRules.mXipExpr = expr;
+ break;
+ default:
+ MOZ_CRASH("impossible value for aNewReg");
+ }
+ break;
+ }
+
+ default:
+ // Leave |reason1| and |reason2| unset here, for the reasons
+ // explained in the analogous point in the ARM case just above.
+ goto cant_summarise;
+ }
+
+ // On Intel, it seems the old SP value before the call is always the
+ // same as the CFA. Therefore, in the absence of any other way to
+ // recover the SP, specify that the CFA should be copied.
+ if (mCurrRules.mXspExpr.mHow == UNKNOWN) {
+ mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0);
+ }
+
+ // Also, gcc says "Undef" for BP when it is unchanged.
+ if (mCurrRules.mXbpExpr.mHow == UNKNOWN) {
+ mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0);
+ }
+
+#elif defined(GP_ARCH_mips64)
+ // ---------------- mips ---------------- //
+ //
+ // Now, can we add the rule to our summary? This depends on whether
+ // the registers and the overall expression are representable. This
+ // is the heart of the summarisation process.
+ switch (aNewReg) {
+ case DW_REG_CFA:
+ // This is a rule that defines the CFA. The only forms we can
+ // represent are: = SP+offset or = FP+offset.
+ if (how != NODEREF) {
+ reason1 = "rule for DW_REG_CFA: invalid |how|";
+ goto cant_summarise;
+ }
+ if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) {
+ reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+ goto cant_summarise;
+ }
+ mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+ break;
+
+ case DW_REG_MIPS_SP:
+ case DW_REG_MIPS_FP:
+ case DW_REG_MIPS_PC: {
+ // This is a new rule for SP, FP or PC (the return address).
+ switch (how) {
+ case NODEREF:
+ case DEREF:
+ // Check the old register is one we're tracking.
+ if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+ oldReg != DW_REG_CFA) {
+ reason1 = "rule for SP/FP/PC: uses untracked reg";
+ goto cant_summarise;
+ }
+ break;
+ case PFXEXPR: {
+ // Check that the prefix expression only mentions tracked registers.
+ const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+ reason2 = checkPfxExpr(pfxInstrs, offset);
+ if (reason2) {
+ reason1 = "rule for SP/FP/PC: ";
+ goto cant_summarise;
+ }
+ break;
+ }
+ default:
+ goto cant_summarise;
+ }
+ LExpr expr = LExpr(how, oldReg, offset);
+ switch (aNewReg) {
+ case DW_REG_MIPS_FP:
+ mCurrRules.mFPexpr = expr;
+ break;
+ case DW_REG_MIPS_SP:
+ mCurrRules.mSPexpr = expr;
+ break;
+ case DW_REG_MIPS_PC:
+ mCurrRules.mPCexpr = expr;
+ break;
+ default:
+ MOZ_CRASH("impossible value for aNewReg");
+ }
+ break;
+ }
+ default:
+ // Leave |reason1| and |reason2| unset here, for the reasons
+ // explained in the analogous point in the ARM case just above.
+ goto cant_summarise;
+ }
+
+ // On MIPS, it seems the old SP value before the call is always the
+ // same as the CFA. Therefore, in the absence of any other way to
+ // recover the SP, specify that the CFA should be copied.
+ if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+ mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+ }
+
+ // Also, gcc says "Undef" for FP when it is unchanged.
+ if (mCurrRules.mFPexpr.mHow == UNKNOWN) {
+ mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0);
+ }
+
+#else
+
+# error "Unsupported arch"
+#endif
+
+ return;
+
+cant_summarise:
+ if (reason1 || reason2) {
+ char buf[200];
+ SprintfLiteral(buf,
+ "LUL can't summarise: "
+ "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n",
+ (unsigned long long int)(aAddress - mTextBias),
+ reason1 ? reason1 : "", reason2 ? reason2 : "",
+ NameOf_LExprHow(how), (unsigned int)oldReg,
+ (long long int)offset);
+ mLog(buf);
+ }
+}
+
+uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) {
+ return mSecMap->AddPfxInstr(pfxi);
+}
+
+void Summariser::End() {
+ if (DEBUG_SUMMARISER) {
+ mLog("LUL End\n");
+ }
+ if (mCurrAddr < mMax1Addr) {
+ mCurrRules.mAddr = mCurrAddr;
+ mCurrRules.mLen = mMax1Addr - mCurrAddr;
+ mSecMap->AddRuleSet(&mCurrRules);
+ if (DEBUG_SUMMARISER) {
+ mLog("LUL ");
+ mCurrRules.Print(mLog);
+ mLog("\n");
+ }
+ }
+}
+
+} // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.h b/mozglue/baseprofiler/lul/LulDwarfSummariser.h
new file mode 100644
index 0000000000..30f1ba23c1
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulDwarfSummariser_h
+#define LulDwarfSummariser_h
+
+#include "LulMainInt.h"
+
+namespace lul {
+
+class Summariser {
+ public:
+ Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*));
+
+ virtual void Entry(uintptr_t aAddress, uintptr_t aLength);
+ virtual void End();
+
+ // Tell the summariser that the value for |aNewReg| at |aAddress| is
+ // recovered using the LExpr that can be constructed using the
+ // components |how|, |oldReg| and |offset|. The summariser will
+ // inspect the components and may reject them for various reasons,
+ // but the hope is that it will find them acceptable and record this
+ // rule permanently.
+ virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+ int16_t oldReg, int64_t offset);
+
+ virtual uint32_t AddPfxInstr(PfxInstr pfxi);
+
+ // Send output to the logging sink, for debugging.
+ virtual void Log(const char* str) { mLog(str); }
+
+ private:
+ // The SecMap in which we park the finished summaries (RuleSets) and
+ // also any PfxInstrs derived from Dwarf expressions.
+ SecMap* mSecMap;
+
+ // Running state for the current summary (RuleSet) under construction.
+ RuleSet mCurrRules;
+
+ // The start of the address range to which the RuleSet under
+ // construction applies.
+ uintptr_t mCurrAddr;
+
+ // The highest address, plus one, for which the RuleSet under
+ // construction could possibly apply. If there are no further
+ // incoming events then mCurrRules will eventually be emitted
+ // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is
+ // nonempty.
+ uintptr_t mMax1Addr;
+
+ // The bias value (to add to the SVMAs, to get AVMAs) to be used
+ // when adding entries into mSecMap.
+ uintptr_t mTextBias;
+
+ // A logging sink, for debugging.
+ void (*mLog)(const char* aFmt);
+};
+
+} // namespace lul
+
+#endif // LulDwarfSummariser_h
diff --git a/mozglue/baseprofiler/lul/LulElf.cpp b/mozglue/baseprofiler/lul/LulElf.cpp
new file mode 100644
index 0000000000..16866c5a41
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElf.cpp
@@ -0,0 +1,871 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// (derived from)
+// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
+// Find all the debugging info in a file and dump it as a Breakpad symbol file.
+//
+// dump_symbols.h: Read debugging information from an ELF file, and write
+// it out as a Breakpad symbol file.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/linux/dump_symbols.cc
+// src/common/linux/elfutils.cc
+// src/common/linux/file_id.cc
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+#include <cstdlib>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "PlatformMacros.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulElfInt.h"
+#include "LulMainInt.h"
+
+#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
+// bionic and older glibsc don't define it
+# define SHT_ARM_EXIDX (SHT_LOPROC + 1)
+#endif
+
+// Old Linux header doesn't define EM_AARCH64
+#ifndef EM_AARCH64
+# define EM_AARCH64 183
+#endif
+
+// This namespace contains helper functions.
+namespace {
+
+using lul::DwarfCFIToModule;
+using lul::FindElfSectionByName;
+using lul::GetOffset;
+using lul::IsValidElf;
+using lul::Summariser;
+using lul::UniqueStringUniverse;
+using std::set;
+using std::string;
+using std::vector;
+
+//
+// FDWrapper
+//
+// Wrapper class to make sure opened file is closed.
+//
+class FDWrapper {
+ public:
+ explicit FDWrapper(int fd) : fd_(fd) {}
+ ~FDWrapper() {
+ if (fd_ != -1) close(fd_);
+ }
+ int get() { return fd_; }
+ int release() {
+ int fd = fd_;
+ fd_ = -1;
+ return fd;
+ }
+
+ private:
+ int fd_;
+};
+
+//
+// MmapWrapper
+//
+// Wrapper class to make sure mapped regions are unmapped.
+//
+class MmapWrapper {
+ public:
+ MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
+ ~MmapWrapper() {
+ if (is_set_ && base_ != NULL) {
+ MOZ_ASSERT(size_ > 0);
+ munmap(base_, size_);
+ }
+ }
+ void set(void* mapped_address, size_t mapped_size) {
+ is_set_ = true;
+ base_ = mapped_address;
+ size_ = mapped_size;
+ }
+ void release() {
+ MOZ_ASSERT(is_set_);
+ is_set_ = false;
+ base_ = NULL;
+ size_ = 0;
+ }
+
+ private:
+ bool is_set_;
+ void* base_;
+ size_t size_;
+};
+
+// Set NUM_DW_REGNAMES to be the number of Dwarf register names
+// appropriate to the machine architecture given in HEADER. Return
+// true on success, or false if HEADER's machine architecture is not
+// supported.
+template <typename ElfClass>
+bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
+ unsigned int* num_dw_regnames) {
+ switch (elf_header->e_machine) {
+ case EM_386:
+ *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
+ return true;
+ case EM_ARM:
+ *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
+ return true;
+ case EM_X86_64:
+ *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
+ return true;
+ case EM_MIPS:
+ *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
+ return true;
+ case EM_AARCH64:
+ *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
+ return true;
+ default:
+ MOZ_ASSERT(0);
+ return false;
+ }
+}
+
+template <typename ElfClass>
+bool LoadDwarfCFI(const string& dwarf_filename,
+ const typename ElfClass::Ehdr* elf_header,
+ const char* section_name,
+ const typename ElfClass::Shdr* section, const bool eh_frame,
+ const typename ElfClass::Shdr* got_section,
+ const typename ElfClass::Shdr* text_section,
+ const bool big_endian, SecMap* smap, uintptr_t text_bias,
+ UniqueStringUniverse* usu, void (*log)(const char*)) {
+ // Find the appropriate set of register names for this file's
+ // architecture.
+ unsigned int num_dw_regs = 0;
+ if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
+ fprintf(stderr,
+ "%s: unrecognized ELF machine architecture '%d';"
+ " cannot convert DWARF call frame information\n",
+ dwarf_filename.c_str(), elf_header->e_machine);
+ return false;
+ }
+
+ const lul::Endianness endianness =
+ big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
+
+ // Find the call frame information and its size.
+ const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
+ size_t cfi_size = section->sh_size;
+
+ // Plug together the parser, handler, and their entourages.
+
+ // Here's a summariser, which will receive the output of the
+ // parser, create summaries, and add them to |smap|.
+ Summariser summ(smap, text_bias, log);
+
+ lul::ByteReader reader(endianness);
+ reader.SetAddressSize(ElfClass::kAddrSize);
+
+ DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
+ DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
+
+ // Provide the base addresses for .eh_frame encoded pointers, if
+ // possible.
+ reader.SetCFIDataBase(section->sh_addr, cfi);
+ if (got_section) reader.SetDataBase(got_section->sh_addr);
+ if (text_section) reader.SetTextBase(text_section->sh_addr);
+
+ lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
+ section_name);
+ lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
+ eh_frame);
+ parser.Start();
+
+ return true;
+}
+
+bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
+ void** elf_header) {
+ int obj_fd = open(obj_file.c_str(), O_RDONLY);
+ if (obj_fd < 0) {
+ fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
+ strerror(errno));
+ return false;
+ }
+ FDWrapper obj_fd_wrapper(obj_fd);
+ struct stat st;
+ if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
+ fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
+ strerror(errno));
+ return false;
+ }
+ // Mapping it read-only is good enough. In any case, mapping it
+ // read-write confuses Valgrind's debuginfo acquire/discard
+ // heuristics, making it hard to profile the profiler.
+ void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
+ if (obj_base == MAP_FAILED) {
+ fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
+ strerror(errno));
+ return false;
+ }
+ map_wrapper->set(obj_base, st.st_size);
+ *elf_header = obj_base;
+ if (!IsValidElf(*elf_header)) {
+ fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
+ return false;
+ }
+ return true;
+}
+
+// Get the endianness of ELF_HEADER. If it's invalid, return false.
+template <typename ElfClass>
+bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
+ bool* big_endian) {
+ if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
+ *big_endian = false;
+ return true;
+ }
+ if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
+ *big_endian = true;
+ return true;
+ }
+
+ fprintf(stderr, "bad data encoding in ELF header: %d\n",
+ elf_header->e_ident[EI_DATA]);
+ return false;
+}
+
+//
+// LoadSymbolsInfo
+//
+// Holds the state between the two calls to LoadSymbols() in case it's necessary
+// to follow the .gnu_debuglink section and load debug information from a
+// different file.
+//
+template <typename ElfClass>
+class LoadSymbolsInfo {
+ public:
+ typedef typename ElfClass::Addr Addr;
+
+ explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
+ : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
+
+ // Keeps track of which sections have been loaded so sections don't
+ // accidentally get loaded twice from two different files.
+ void LoadedSection(const string& section) {
+ if (loaded_sections_.count(section) == 0) {
+ loaded_sections_.insert(section);
+ } else {
+ fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
+ }
+ }
+
+ string debuglink_file() const { return debuglink_file_; }
+
+ private:
+ const vector<string>& debug_dirs_; // Directories in which to
+ // search for the debug ELF file.
+
+ string debuglink_file_; // Full path to the debug ELF file.
+
+ bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
+
+ set<string> loaded_sections_; // Tracks the Loaded ELF sections
+ // between calls to LoadSymbols().
+};
+
+// Find the preferred loading address of the binary.
+template <typename ElfClass>
+typename ElfClass::Addr GetLoadingAddress(
+ const typename ElfClass::Phdr* program_headers, int nheader) {
+ typedef typename ElfClass::Phdr Phdr;
+
+ // For non-PIC executables (e_type == ET_EXEC), the load address is
+ // the start address of the first PT_LOAD segment. (ELF requires
+ // the segments to be sorted by load address.) For PIC executables
+ // and dynamic libraries (e_type == ET_DYN), this address will
+ // normally be zero.
+ for (int i = 0; i < nheader; ++i) {
+ const Phdr& header = program_headers[i];
+ if (header.p_type == PT_LOAD) return header.p_vaddr;
+ }
+ return 0;
+}
+
+template <typename ElfClass>
+bool LoadSymbols(const string& obj_file, const bool big_endian,
+ const typename ElfClass::Ehdr* elf_header,
+ const bool read_gnu_debug_link,
+ LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
+ size_t rx_size, UniqueStringUniverse* usu,
+ void (*log)(const char*)) {
+ typedef typename ElfClass::Phdr Phdr;
+ typedef typename ElfClass::Shdr Shdr;
+
+ char buf[500];
+ SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str());
+ buf[sizeof(buf) - 1] = 0;
+ log(buf);
+
+ // This is how the text bias is calculated.
+ // BEGIN CALCULATE BIAS
+ uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
+ GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
+ elf_header->e_phnum);
+ uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
+ SprintfLiteral(buf, "LoadSymbols: rx_avma=%llx, text_bias=%llx",
+ (unsigned long long int)(uintptr_t)rx_avma,
+ (unsigned long long int)text_bias);
+ buf[sizeof(buf) - 1] = 0;
+ log(buf);
+ // END CALCULATE BIAS
+
+ const Shdr* sections =
+ GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+ const Shdr* section_names = sections + elf_header->e_shstrndx;
+ const char* names =
+ GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+ const char* names_end = names + section_names->sh_size;
+ bool found_usable_info = false;
+
+ // Dwarf Call Frame Information (CFI) is actually independent from
+ // the other DWARF debugging information, and can be used alone.
+ const Shdr* dwarf_cfi_section =
+ FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
+ names, names_end, elf_header->e_shnum);
+ if (dwarf_cfi_section) {
+ // Ignore the return value of this function; even without call frame
+ // information, the other debugging information could be perfectly
+ // useful.
+ info->LoadedSection(".debug_frame");
+ bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
+ dwarf_cfi_section, false, 0, 0,
+ big_endian, smap, text_bias, usu, log);
+ found_usable_info = found_usable_info || result;
+ if (result) log("LoadSymbols: read CFI from .debug_frame");
+ }
+
+ // Linux C++ exception handling information can also provide
+ // unwinding data.
+ const Shdr* eh_frame_section =
+ FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
+ names_end, elf_header->e_shnum);
+ if (eh_frame_section) {
+ // Pointers in .eh_frame data may be relative to the base addresses of
+ // certain sections. Provide those sections if present.
+ const Shdr* got_section = FindElfSectionByName<ElfClass>(
+ ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+ const Shdr* text_section = FindElfSectionByName<ElfClass>(
+ ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+ info->LoadedSection(".eh_frame");
+ // As above, ignore the return value of this function.
+ bool result = LoadDwarfCFI<ElfClass>(
+ obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
+ text_section, big_endian, smap, text_bias, usu, log);
+ found_usable_info = found_usable_info || result;
+ if (result) log("LoadSymbols: read CFI from .eh_frame");
+ }
+
+ SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str());
+ buf[sizeof(buf) - 1] = 0;
+ log(buf);
+
+ return found_usable_info;
+}
+
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+template <typename ElfClass>
+const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
+ typedef typename ElfClass::Half Half;
+ Half arch = elf_header->e_machine;
+ switch (arch) {
+ case EM_386:
+ return "x86";
+ case EM_ARM:
+ return "arm";
+ case EM_AARCH64:
+ return "arm64";
+ case EM_MIPS:
+ return "mips";
+ case EM_PPC64:
+ return "ppc64";
+ case EM_PPC:
+ return "ppc";
+ case EM_S390:
+ return "s390";
+ case EM_SPARC:
+ return "sparc";
+ case EM_SPARCV9:
+ return "sparcv9";
+ case EM_X86_64:
+ return "x86_64";
+ default:
+ return NULL;
+ }
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+string FormatIdentifier(unsigned char identifier[16]) {
+ char identifier_str[40];
+ lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
+ sizeof(identifier_str));
+ string id_no_dash;
+ for (int i = 0; identifier_str[i] != '\0'; ++i)
+ if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
+ // Add an extra "0" by the end. PDB files on Windows have an 'age'
+ // number appended to the end of the file identifier; this isn't
+ // really used or necessary on other platforms, but be consistent.
+ id_no_dash += '0';
+ return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+string BaseFileName(const string& filename) {
+ // Lots of copies! basename's behavior is less than ideal.
+ char* c_filename = strdup(filename.c_str());
+ string base = basename(c_filename);
+ free(c_filename);
+ return base;
+}
+
+template <typename ElfClass>
+bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
+ const string& obj_filename,
+ const vector<string>& debug_dirs, SecMap* smap,
+ void* rx_avma, size_t rx_size,
+ UniqueStringUniverse* usu,
+ void (*log)(const char*)) {
+ typedef typename ElfClass::Ehdr Ehdr;
+
+ unsigned char identifier[16];
+ if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
+ fprintf(stderr, "%s: unable to generate file identifier\n",
+ obj_filename.c_str());
+ return false;
+ }
+
+ const char* architecture = ElfArchitecture<ElfClass>(elf_header);
+ if (!architecture) {
+ fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+ obj_filename.c_str(), elf_header->e_machine);
+ return false;
+ }
+
+ // Figure out what endianness this file is.
+ bool big_endian;
+ if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
+
+ string name = BaseFileName(obj_filename);
+ string os = "Linux";
+ string id = FormatIdentifier(identifier);
+
+ LoadSymbolsInfo<ElfClass> info(debug_dirs);
+ if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
+ !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
+ usu, log)) {
+ const string debuglink_file = info.debuglink_file();
+ if (debuglink_file.empty()) return false;
+
+ // Load debuglink ELF file.
+ fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
+ MmapWrapper debug_map_wrapper;
+ Ehdr* debug_elf_header = NULL;
+ if (!LoadELF(debuglink_file, &debug_map_wrapper,
+ reinterpret_cast<void**>(&debug_elf_header)))
+ return false;
+ // Sanity checks to make sure everything matches up.
+ const char* debug_architecture =
+ ElfArchitecture<ElfClass>(debug_elf_header);
+ if (!debug_architecture) {
+ fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+ debuglink_file.c_str(), debug_elf_header->e_machine);
+ return false;
+ }
+ if (strcmp(architecture, debug_architecture)) {
+ fprintf(stderr,
+ "%s with ELF machine architecture %s does not match "
+ "%s with ELF architecture %s\n",
+ debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
+ architecture);
+ return false;
+ }
+
+ bool debug_big_endian;
+ if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
+ return false;
+ if (debug_big_endian != big_endian) {
+ fprintf(stderr, "%s and %s does not match in endianness\n",
+ obj_filename.c_str(), debuglink_file.c_str());
+ return false;
+ }
+
+ if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
+ debug_elf_header, false, &info, smap, rx_avma,
+ rx_size, usu, log)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace lul {
+
+bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
+ const vector<string>& debug_dirs, SecMap* smap,
+ void* rx_avma, size_t rx_size,
+ UniqueStringUniverse* usu,
+ void (*log)(const char*)) {
+ if (!IsValidElf(obj_file)) {
+ fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
+ return false;
+ }
+
+ int elfclass = ElfClass(obj_file);
+ if (elfclass == ELFCLASS32) {
+ return ReadSymbolDataElfClass<ElfClass32>(
+ reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
+ smap, rx_avma, rx_size, usu, log);
+ }
+ if (elfclass == ELFCLASS64) {
+ return ReadSymbolDataElfClass<ElfClass64>(
+ reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
+ smap, rx_avma, rx_size, usu, log);
+ }
+
+ return false;
+}
+
+bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
+ SecMap* smap, void* rx_avma, size_t rx_size,
+ UniqueStringUniverse* usu, void (*log)(const char*)) {
+ MmapWrapper map_wrapper;
+ void* elf_header = NULL;
+ if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
+
+ return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
+ obj_file, debug_dirs, smap, rx_avma, rx_size,
+ usu, log);
+}
+
+namespace {
+
+template <typename ElfClass>
+void FindElfClassSection(const char* elf_base, const char* section_name,
+ typename ElfClass::Word section_type,
+ const void** section_start, int* section_size) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Shdr Shdr;
+
+ MOZ_ASSERT(elf_base);
+ MOZ_ASSERT(section_start);
+ MOZ_ASSERT(section_size);
+
+ MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+ const Shdr* sections =
+ GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+ const Shdr* section_names = sections + elf_header->e_shstrndx;
+ const char* names =
+ GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+ const char* names_end = names + section_names->sh_size;
+
+ const Shdr* section =
+ FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+ names, names_end, elf_header->e_shnum);
+
+ if (section != NULL && section->sh_size > 0) {
+ *section_start = elf_base + section->sh_offset;
+ *section_size = section->sh_size;
+ }
+}
+
+template <typename ElfClass>
+void FindElfClassSegment(const char* elf_base,
+ typename ElfClass::Word segment_type,
+ const void** segment_start, int* segment_size) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Phdr Phdr;
+
+ MOZ_ASSERT(elf_base);
+ MOZ_ASSERT(segment_start);
+ MOZ_ASSERT(segment_size);
+
+ MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+ const Phdr* phdrs =
+ GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+ for (int i = 0; i < elf_header->e_phnum; ++i) {
+ if (phdrs[i].p_type == segment_type) {
+ *segment_start = elf_base + phdrs[i].p_offset;
+ *segment_size = phdrs[i].p_filesz;
+ return;
+ }
+ }
+}
+
+} // namespace
+
+bool IsValidElf(const void* elf_base) {
+ return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
+}
+
+int ElfClass(const void* elf_base) {
+ const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+ return elf_header->e_ident[EI_CLASS];
+}
+
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+ uint32_t section_type, const void** section_start,
+ int* section_size, int* elfclass) {
+ MOZ_ASSERT(elf_mapped_base);
+ MOZ_ASSERT(section_start);
+ MOZ_ASSERT(section_size);
+
+ *section_start = NULL;
+ *section_size = 0;
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ if (elfclass) {
+ *elfclass = cls;
+ }
+
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
+ section_start, section_size);
+ return *section_start != NULL;
+ } else if (cls == ELFCLASS64) {
+ FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
+ section_start, section_size);
+ return *section_start != NULL;
+ }
+
+ return false;
+}
+
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+ const void** segment_start, int* segment_size,
+ int* elfclass) {
+ MOZ_ASSERT(elf_mapped_base);
+ MOZ_ASSERT(segment_start);
+ MOZ_ASSERT(segment_size);
+
+ *segment_start = NULL;
+ *segment_size = 0;
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ if (elfclass) {
+ *elfclass = cls;
+ }
+
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
+ segment_size);
+ return *segment_start != NULL;
+ } else if (cls == ELFCLASS64) {
+ FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
+ segment_size);
+ return *segment_start != NULL;
+ }
+
+ return false;
+}
+
+// (derived from)
+// file_id.cc: Return a unique identifier for a file
+//
+// See file_id.h for documentation
+//
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+// These functions are also used inside the crashed process, so be safe
+// and use the syscall/libc wrappers instead of direct syscalls or libc.
+
+template <typename ElfClass>
+static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
+ uint8_t identifier[kMDGUIDSize]) {
+ typedef typename ElfClass::Nhdr Nhdr;
+
+ const void* section_end = reinterpret_cast<const char*>(section) + length;
+ const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+ while (reinterpret_cast<const void*>(note_header) < section_end) {
+ if (note_header->n_type == NT_GNU_BUILD_ID) break;
+ note_header = reinterpret_cast<const Nhdr*>(
+ reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+ NOTE_PADDING(note_header->n_namesz) +
+ NOTE_PADDING(note_header->n_descsz));
+ }
+ if (reinterpret_cast<const void*>(note_header) >= section_end ||
+ note_header->n_descsz == 0) {
+ return false;
+ }
+
+ const char* build_id = reinterpret_cast<const char*>(note_header) +
+ sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
+ // Copy as many bits of the build ID as will fit
+ // into the GUID space.
+ memset(identifier, 0, kMDGUIDSize);
+ memcpy(identifier, build_id,
+ std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
+
+ return true;
+}
+
+// Attempt to locate a .note.gnu.build-id section in an ELF binary
+// and copy as many bytes of it as will fit into |identifier|.
+static bool FindElfBuildIDNote(const void* elf_mapped_base,
+ uint8_t identifier[kMDGUIDSize]) {
+ void* note_section;
+ int note_size, elfclass;
+ if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
+ &note_size, &elfclass) ||
+ note_size == 0) &&
+ (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+ (const void**)&note_section, &note_size, &elfclass) ||
+ note_size == 0)) {
+ return false;
+ }
+
+ if (elfclass == ELFCLASS32) {
+ return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
+ identifier);
+ } else if (elfclass == ELFCLASS64) {
+ return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
+ identifier);
+ }
+
+ return false;
+}
+
+// Attempt to locate the .text section of an ELF binary and generate
+// a simple hash by XORing the first page worth of bytes into |identifier|.
+static bool HashElfTextSection(const void* elf_mapped_base,
+ uint8_t identifier[kMDGUIDSize]) {
+ void* text_section;
+ int text_size;
+ if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+ (const void**)&text_section, &text_size, NULL) ||
+ text_size == 0) {
+ return false;
+ }
+
+ memset(identifier, 0, kMDGUIDSize);
+ const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+ const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
+ while (ptr < ptr_end) {
+ for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+ ptr += kMDGUIDSize;
+ }
+ return true;
+}
+
+// static
+bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
+ uint8_t identifier[kMDGUIDSize]) {
+ // Look for a build id note first.
+ if (FindElfBuildIDNote(base, identifier)) return true;
+
+ // Fall back on hashing the first page of the text section.
+ return HashElfTextSection(base, identifier);
+}
+
+// static
+void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+ char* buffer, int buffer_length) {
+ uint8_t identifier_swapped[kMDGUIDSize];
+
+ // Endian-ness swap to match dump processor expectation.
+ memcpy(identifier_swapped, identifier, kMDGUIDSize);
+ uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+ *data1 = htonl(*data1);
+ uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+ *data2 = htons(*data2);
+ uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+ *data3 = htons(*data3);
+
+ int buffer_idx = 0;
+ for (unsigned int idx = 0;
+ (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
+ int hi = (identifier_swapped[idx] >> 4) & 0x0F;
+ int lo = (identifier_swapped[idx]) & 0x0F;
+
+ if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
+ buffer[buffer_idx++] = '-';
+
+ buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
+ buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
+ }
+
+ // NULL terminate
+ buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
+}
+
+} // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulElfExt.h b/mozglue/baseprofiler/lul/LulElfExt.h
new file mode 100644
index 0000000000..73d9ff7f15
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElfExt.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/linux/dump_symbols.h
+
+#ifndef LulElfExt_h
+#define LulElfExt_h
+
+// These two functions are the external interface to the
+// ELF/Dwarf/EXIDX reader.
+
+#include "LulMainInt.h"
+
+using lul::SecMap;
+
+namespace lul {
+
+class UniqueStringUniverse;
+
+// Find all the unwind information in OBJ_FILE, an ELF executable
+// or shared library, and add it to SMAP.
+bool ReadSymbolData(const std::string& obj_file,
+ const std::vector<std::string>& debug_dirs, SecMap* smap,
+ void* rx_avma, size_t rx_size, UniqueStringUniverse* usu,
+ void (*log)(const char*));
+
+// The same as ReadSymbolData, except that OBJ_FILE is assumed to
+// point to a mapped-in image of OBJ_FILENAME.
+bool ReadSymbolDataInternal(const uint8_t* obj_file,
+ const std::string& obj_filename,
+ const std::vector<std::string>& debug_dirs,
+ SecMap* smap, void* rx_avma, size_t rx_size,
+ UniqueStringUniverse* usu,
+ void (*log)(const char*));
+
+} // namespace lul
+
+#endif // LulElfExt_h
diff --git a/mozglue/baseprofiler/lul/LulElfInt.h b/mozglue/baseprofiler/lul/LulElfInt.h
new file mode 100644
index 0000000000..31ffba8ff0
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElfInt.h
@@ -0,0 +1,218 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2012, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+// src/common/android/include/elf.h
+// src/common/linux/elfutils.h
+// src/common/linux/file_id.h
+// src/common/linux/elfutils-inl.h
+
+#ifndef LulElfInt_h
+#define LulElfInt_h
+
+// This header defines functions etc internal to the ELF reader. It
+// should not be included outside of LulElf.cpp.
+
+#include <elf.h>
+#include <stdlib.h>
+
+#include "mozilla/Assertions.h"
+
+#include "PlatformMacros.h"
+
+// (derived from)
+// elfutils.h: Utilities for dealing with ELF files.
+//
+#include <link.h>
+
+#if defined(GP_OS_android)
+
+// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h
+// The Android headers don't always define this constant.
+# ifndef EM_X86_64
+# define EM_X86_64 62
+# endif
+
+# ifndef EM_PPC64
+# define EM_PPC64 21
+# endif
+
+# ifndef EM_S390
+# define EM_S390 22
+# endif
+
+# ifndef NT_GNU_BUILD_ID
+# define NT_GNU_BUILD_ID 3
+# endif
+
+# ifndef ElfW
+# define ElfW(type) _ElfW(Elf, ELFSIZE, type)
+# define _ElfW(e, w, t) _ElfW_1(e, w, _##t)
+# define _ElfW_1(e, w, t) e##w##t
+# endif
+
+#endif
+
+#if defined(GP_OS_freebsd)
+
+# ifndef ElfW
+# define ElfW(type) Elf_##type
+# endif
+
+#endif
+
+namespace lul {
+
+// Traits classes so consumers can write templatized code to deal
+// with specific ELF bits.
+struct ElfClass32 {
+ typedef Elf32_Addr Addr;
+ typedef Elf32_Ehdr Ehdr;
+ typedef Elf32_Nhdr Nhdr;
+ typedef Elf32_Phdr Phdr;
+ typedef Elf32_Shdr Shdr;
+ typedef Elf32_Half Half;
+ typedef Elf32_Off Off;
+ typedef Elf32_Word Word;
+ static const int kClass = ELFCLASS32;
+ static const size_t kAddrSize = sizeof(Elf32_Addr);
+};
+
+struct ElfClass64 {
+ typedef Elf64_Addr Addr;
+ typedef Elf64_Ehdr Ehdr;
+ typedef Elf64_Nhdr Nhdr;
+ typedef Elf64_Phdr Phdr;
+ typedef Elf64_Shdr Shdr;
+ typedef Elf64_Half Half;
+ typedef Elf64_Off Off;
+ typedef Elf64_Word Word;
+ static const int kClass = ELFCLASS64;
+ static const size_t kAddrSize = sizeof(Elf64_Addr);
+};
+
+bool IsValidElf(const void* elf_header);
+int ElfClass(const void* elf_base);
+
+// Attempt to find a section named |section_name| of type |section_type|
+// in the ELF binary data at |elf_mapped_base|. On success, returns true
+// and sets |*section_start| to point to the start of the section data,
+// and |*section_size| to the size of the section's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+ uint32_t section_type, const void** section_start,
+ int* section_size, int* elfclass);
+
+// Internal helper method, exposed for convenience for callers
+// that already have more info.
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+ const char* name, typename ElfClass::Word section_type,
+ const typename ElfClass::Shdr* sections, const char* section_names,
+ const char* names_end, int nsection);
+
+// Attempt to find the first segment of type |segment_type| in the ELF
+// binary data at |elf_mapped_base|. On success, returns true and sets
+// |*segment_start| to point to the start of the segment data, and
+// and |*segment_size| to the size of the segment's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+ const void** segment_start, int* segment_size,
+ int* elfclass);
+
+// Convert an offset from an Elf header into a pointer to the mapped
+// address in the current process. Takes an extra template parameter
+// to specify the return type to avoid having to dynamic_cast the
+// result.
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+ typename ElfClass::Off offset);
+
+// (derived from)
+// file_id.h: Return a unique identifier for a file
+//
+
+static const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+ // Load the identifier for the elf file mapped into memory at |base| into
+ // |identifier|. Return false if the identifier could not be created for the
+ // file.
+ static bool ElfFileIdentifierFromMappedFile(const void* base,
+ uint8_t identifier[kMDGUIDSize]);
+
+ // Convert the |identifier| data to a NULL terminated string. The string will
+ // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE).
+ // The |buffer| should be at least 37 bytes long to receive all of the data
+ // and termination. Shorter buffers will contain truncated data.
+ static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+ char* buffer, int buffer_length);
+};
+
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+ typename ElfClass::Off offset) {
+ return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+ offset);
+}
+
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+ const char* name, typename ElfClass::Word section_type,
+ const typename ElfClass::Shdr* sections, const char* section_names,
+ const char* names_end, int nsection) {
+ MOZ_ASSERT(name != NULL);
+ MOZ_ASSERT(sections != NULL);
+ MOZ_ASSERT(nsection > 0);
+
+ int name_len = strlen(name);
+ if (name_len == 0) return NULL;
+
+ for (int i = 0; i < nsection; ++i) {
+ const char* section_name = section_names + sections[i].sh_name;
+ if (sections[i].sh_type == section_type &&
+ names_end - section_name >= name_len + 1 &&
+ strcmp(name, section_name) == 0) {
+ return sections + i;
+ }
+ }
+ return NULL;
+}
+
+} // namespace lul
+
+// And finally, the external interface, offered to LulMain.cpp
+#include "LulElfExt.h"
+
+#endif // LulElfInt_h
diff --git a/mozglue/baseprofiler/lul/LulMain.cpp b/mozglue/baseprofiler/lul/LulMain.cpp
new file mode 100644
index 0000000000..75f205ac3a
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMain.cpp
@@ -0,0 +1,1958 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulMain.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h> // write(), only for testing LUL
+
+#include <algorithm> // std::sort
+#include <string>
+#include <utility>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+
+#include "BaseProfiler.h"
+#include "LulCommonExt.h"
+#include "LulElfExt.h"
+#include "LulMainInt.h"
+
+using mozilla::baseprofiler::profiler_current_process_id;
+using mozilla::baseprofiler::profiler_current_thread_id;
+
+// Set this to 1 for verbose logging
+#define DEBUG_MAIN 0
+
+namespace lul {
+
+using mozilla::CheckedInt;
+using mozilla::DebugOnly;
+using mozilla::MallocSizeOf;
+using mozilla::Unused;
+using std::pair;
+using std::string;
+using std::vector;
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
+// Any such function -- and, hence, the transitive closure of those
+// reachable from it -- must not do any dynamic memory allocation.
+// Doing so risks deadlock. There is exactly one root function for
+// the transitive closure: Lul::Unwind.
+//
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+////////////////////////////////////////////////////////////////
+// RuleSet //
+////////////////////////////////////////////////////////////////
+
+static const char* NameOf_DW_REG(int16_t aReg) {
+ switch (aReg) {
+ case DW_REG_CFA:
+ return "cfa";
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ case DW_REG_INTEL_XBP:
+ return "xbp";
+ case DW_REG_INTEL_XSP:
+ return "xsp";
+ case DW_REG_INTEL_XIP:
+ return "xip";
+#elif defined(GP_ARCH_arm)
+ case DW_REG_ARM_R7:
+ return "r7";
+ case DW_REG_ARM_R11:
+ return "r11";
+ case DW_REG_ARM_R12:
+ return "r12";
+ case DW_REG_ARM_R13:
+ return "r13";
+ case DW_REG_ARM_R14:
+ return "r14";
+ case DW_REG_ARM_R15:
+ return "r15";
+#elif defined(GP_ARCH_arm64)
+ case DW_REG_AARCH64_X29:
+ return "x29";
+ case DW_REG_AARCH64_X30:
+ return "x30";
+ case DW_REG_AARCH64_SP:
+ return "sp";
+#elif defined(GP_ARCH_mips64)
+ case DW_REG_MIPS_SP:
+ return "sp";
+ case DW_REG_MIPS_FP:
+ return "fp";
+ case DW_REG_MIPS_PC:
+ return "pc";
+#else
+# error "Unsupported arch"
+#endif
+ default:
+ return "???";
+ }
+}
+
+string LExpr::ShowRule(const char* aNewReg) const {
+ char buf[64];
+ string res = string(aNewReg) + "=";
+ switch (mHow) {
+ case UNKNOWN:
+ res += "Unknown";
+ break;
+ case NODEREF:
+ SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
+ res += buf;
+ break;
+ case DEREF:
+ SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
+ res += buf;
+ break;
+ case PFXEXPR:
+ SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
+ res += buf;
+ break;
+ default:
+ res += "???";
+ break;
+ }
+ return res;
+}
+
+void RuleSet::Print(void (*aLog)(const char*)) const {
+ char buf[96];
+ SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
+ (unsigned long long int)(mAddr + mLen - 1));
+ string res = string(buf);
+ res += mCfaExpr.ShowRule("cfa");
+ res += " in";
+ // For each reg we care about, print the recovery expression.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ res += mXipExpr.ShowRule(" RA");
+ res += mXspExpr.ShowRule(" SP");
+ res += mXbpExpr.ShowRule(" BP");
+#elif defined(GP_ARCH_arm)
+ res += mR15expr.ShowRule(" R15");
+ res += mR7expr.ShowRule(" R7");
+ res += mR11expr.ShowRule(" R11");
+ res += mR12expr.ShowRule(" R12");
+ res += mR13expr.ShowRule(" R13");
+ res += mR14expr.ShowRule(" R14");
+#elif defined(GP_ARCH_arm64)
+ res += mX29expr.ShowRule(" X29");
+ res += mX30expr.ShowRule(" X30");
+ res += mSPexpr.ShowRule(" SP");
+#elif defined(GP_ARCH_mips64)
+ res += mPCexpr.ShowRule(" PC");
+ res += mSPexpr.ShowRule(" SP");
+ res += mFPexpr.ShowRule(" FP");
+#else
+# error "Unsupported arch"
+#endif
+ aLog(res.c_str());
+}
+
+LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
+ switch (aRegno) {
+ case DW_REG_CFA:
+ return &mCfaExpr;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ case DW_REG_INTEL_XIP:
+ return &mXipExpr;
+ case DW_REG_INTEL_XSP:
+ return &mXspExpr;
+ case DW_REG_INTEL_XBP:
+ return &mXbpExpr;
+#elif defined(GP_ARCH_arm)
+ case DW_REG_ARM_R15:
+ return &mR15expr;
+ case DW_REG_ARM_R14:
+ return &mR14expr;
+ case DW_REG_ARM_R13:
+ return &mR13expr;
+ case DW_REG_ARM_R12:
+ return &mR12expr;
+ case DW_REG_ARM_R11:
+ return &mR11expr;
+ case DW_REG_ARM_R7:
+ return &mR7expr;
+#elif defined(GP_ARCH_arm64)
+ case DW_REG_AARCH64_X29:
+ return &mX29expr;
+ case DW_REG_AARCH64_X30:
+ return &mX30expr;
+ case DW_REG_AARCH64_SP:
+ return &mSPexpr;
+#elif defined(GP_ARCH_mips64)
+ case DW_REG_MIPS_SP:
+ return &mSPexpr;
+ case DW_REG_MIPS_FP:
+ return &mFPexpr;
+ case DW_REG_MIPS_PC:
+ return &mPCexpr;
+#else
+# error "Unknown arch"
+#endif
+ default:
+ return nullptr;
+ }
+}
+
+RuleSet::RuleSet() {
+ mAddr = 0;
+ mLen = 0;
+ // The only other fields are of type LExpr and those are initialised
+ // by LExpr::LExpr().
+}
+
+////////////////////////////////////////////////////////////////
+// SecMap //
+////////////////////////////////////////////////////////////////
+
+// See header file LulMainInt.h for comments about invariants.
+
+SecMap::SecMap(void (*aLog)(const char*))
+ : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
+
+SecMap::~SecMap() { mRuleSets.clear(); }
+
+// RUNS IN NO-MALLOC CONTEXT
+RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
+ // Binary search mRuleSets to find one that brackets |ia|.
+ // lo and hi need to be signed, else the loop termination tests
+ // don't work properly. Note that this works correctly even when
+ // mRuleSets.size() == 0.
+
+ // Can't do this until the array has been sorted and preened.
+ MOZ_ASSERT(mUsable);
+
+ long int lo = 0;
+ long int hi = (long int)mRuleSets.size() - 1;
+ while (true) {
+ // current unsearched space is from lo to hi, inclusive.
+ if (lo > hi) {
+ // not found
+ return nullptr;
+ }
+ long int mid = lo + ((hi - lo) / 2);
+ RuleSet* mid_ruleSet = &mRuleSets[mid];
+ uintptr_t mid_minAddr = mid_ruleSet->mAddr;
+ uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
+ if (ia < mid_minAddr) {
+ hi = mid - 1;
+ continue;
+ }
+ if (ia > mid_maxAddr) {
+ lo = mid + 1;
+ continue;
+ }
+ MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+ return mid_ruleSet;
+ }
+ // NOTREACHED
+}
+
+// Add a RuleSet to the collection. The rule is copied in. Calling
+// this makes the map non-searchable.
+void SecMap::AddRuleSet(const RuleSet* rs) {
+ mUsable = false;
+ mRuleSets.push_back(*rs);
+}
+
+// Add a PfxInstr to the vector of such instrs, and return the index
+// in the vector. Calling this makes the map non-searchable.
+uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
+ mUsable = false;
+ mPfxInstrs.push_back(pfxi);
+ return mPfxInstrs.size() - 1;
+}
+
+static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
+ return rs1.mAddr < rs2.mAddr;
+}
+
+// Prepare the map for searching. Completely remove any which don't
+// fall inside the specified range [start, +len).
+void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
+ if (mRuleSets.empty()) {
+ return;
+ }
+
+ MOZ_ASSERT(aLen > 0);
+ if (aLen == 0) {
+ // This should never happen.
+ mRuleSets.clear();
+ return;
+ }
+
+ // Sort by start addresses.
+ std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
+
+ // Detect any entry not completely contained within [start, +len).
+ // Set its length to zero, so that the next pass will remove it.
+ for (size_t i = 0; i < mRuleSets.size(); ++i) {
+ RuleSet* rs = &mRuleSets[i];
+ if (rs->mLen > 0 &&
+ (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
+ rs->mLen = 0;
+ }
+ }
+
+ // Iteratively truncate any overlaps and remove any zero length
+ // entries that might result, or that may have been present
+ // initially. Unless the input is seriously screwy, this is
+ // expected to iterate only once.
+ while (true) {
+ size_t i;
+ size_t n = mRuleSets.size();
+ size_t nZeroLen = 0;
+
+ if (n == 0) {
+ break;
+ }
+
+ for (i = 1; i < n; ++i) {
+ RuleSet* prev = &mRuleSets[i - 1];
+ RuleSet* here = &mRuleSets[i];
+ MOZ_ASSERT(prev->mAddr <= here->mAddr);
+ if (prev->mAddr + prev->mLen > here->mAddr) {
+ prev->mLen = here->mAddr - prev->mAddr;
+ }
+ if (prev->mLen == 0) nZeroLen++;
+ }
+
+ if (mRuleSets[n - 1].mLen == 0) {
+ nZeroLen++;
+ }
+
+ // At this point, the entries are in-order and non-overlapping.
+ // If none of them are zero-length, we are done.
+ if (nZeroLen == 0) {
+ break;
+ }
+
+ // Slide back the entries to remove the zero length ones.
+ size_t j = 0; // The write-point.
+ for (i = 0; i < n; ++i) {
+ if (mRuleSets[i].mLen == 0) {
+ continue;
+ }
+ if (j != i) mRuleSets[j] = mRuleSets[i];
+ ++j;
+ }
+ MOZ_ASSERT(i == n);
+ MOZ_ASSERT(nZeroLen <= n);
+ MOZ_ASSERT(j == n - nZeroLen);
+ while (nZeroLen > 0) {
+ mRuleSets.pop_back();
+ nZeroLen--;
+ }
+
+ MOZ_ASSERT(mRuleSets.size() == j);
+ }
+
+ size_t n = mRuleSets.size();
+
+#ifdef DEBUG
+ // Do a final check on the rules: their address ranges must be
+ // ascending, non overlapping, non zero sized.
+ if (n > 0) {
+ MOZ_ASSERT(mRuleSets[0].mLen > 0);
+ for (size_t i = 1; i < n; ++i) {
+ RuleSet* prev = &mRuleSets[i - 1];
+ RuleSet* here = &mRuleSets[i];
+ MOZ_ASSERT(prev->mAddr < here->mAddr);
+ MOZ_ASSERT(here->mLen > 0);
+ MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
+ }
+ }
+#endif
+
+ // Set the summary min and max address values.
+ if (n == 0) {
+ // Use the values defined in comments in the class declaration.
+ mSummaryMinAddr = 1;
+ mSummaryMaxAddr = 0;
+ } else {
+ mSummaryMinAddr = mRuleSets[0].mAddr;
+ mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
+ }
+ char buf[150];
+ SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
+ (int)n, (unsigned long long int)mSummaryMinAddr,
+ (unsigned long long int)mSummaryMaxAddr);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+
+ // Is now usable for binary search.
+ mUsable = true;
+
+#if 0
+ mLog("\nRulesets after preening\n");
+ for (size_t i = 0; i < mRuleSets.size(); ++i) {
+ mRuleSets[i].Print(mLog);
+ mLog("\n");
+ }
+ mLog("\n");
+#endif
+}
+
+bool SecMap::IsEmpty() { return mRuleSets.empty(); }
+
+size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ size_t n = aMallocSizeOf(this);
+
+ // It's conceivable that these calls would be unsafe with some
+ // implementations of std::vector, but it seems to be working for now...
+ n += aMallocSizeOf(mRuleSets.data());
+ n += aMallocSizeOf(mPfxInstrs.data());
+
+ return n;
+}
+
+////////////////////////////////////////////////////////////////
+// SegArray //
+////////////////////////////////////////////////////////////////
+
+// A SegArray holds a set of address ranges that together exactly
+// cover an address range, with no overlaps or holes. Each range has
+// an associated value, which in this case has been specialised to be
+// a simple boolean. The representation is kept to minimal canonical
+// form in which adjacent ranges with the same associated value are
+// merged together. Each range is represented by a |struct Seg|.
+//
+// SegArrays are used to keep track of which parts of the address
+// space are known to contain instructions.
+class SegArray {
+ public:
+ void add(uintptr_t lo, uintptr_t hi, bool val) {
+ if (lo > hi) {
+ return;
+ }
+ split_at(lo);
+ if (hi < UINTPTR_MAX) {
+ split_at(hi + 1);
+ }
+ std::vector<Seg>::size_type iLo, iHi, i;
+ iLo = find(lo);
+ iHi = find(hi);
+ for (i = iLo; i <= iHi; ++i) {
+ mSegs[i].val = val;
+ }
+ preen();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
+ /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
+ std::vector<Seg>::size_type i = find(addr);
+ if (!mSegs[i].val) {
+ return false;
+ }
+ *rx_min = mSegs[i].lo;
+ *rx_max = mSegs[i].hi;
+ return true;
+ }
+
+ SegArray() {
+ Seg s(0, UINTPTR_MAX, false);
+ mSegs.push_back(s);
+ }
+
+ private:
+ struct Seg {
+ Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
+ uintptr_t lo;
+ uintptr_t hi;
+ bool val;
+ };
+
+ void preen() {
+ for (std::vector<Seg>::iterator iter = mSegs.begin();
+ iter < mSegs.end() - 1; ++iter) {
+ if (iter[0].val != iter[1].val) {
+ continue;
+ }
+ iter[0].hi = iter[1].hi;
+ mSegs.erase(iter + 1);
+ // Back up one, so as not to miss an opportunity to merge
+ // with the entry after this one.
+ --iter;
+ }
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ std::vector<Seg>::size_type find(uintptr_t a) {
+ long int lo = 0;
+ long int hi = (long int)mSegs.size();
+ while (true) {
+ // The unsearched space is lo .. hi inclusive.
+ if (lo > hi) {
+ // Not found. This can't happen.
+ return (std::vector<Seg>::size_type)(-1);
+ }
+ long int mid = lo + ((hi - lo) / 2);
+ uintptr_t mid_lo = mSegs[mid].lo;
+ uintptr_t mid_hi = mSegs[mid].hi;
+ if (a < mid_lo) {
+ hi = mid - 1;
+ continue;
+ }
+ if (a > mid_hi) {
+ lo = mid + 1;
+ continue;
+ }
+ return (std::vector<Seg>::size_type)mid;
+ }
+ }
+
+ void split_at(uintptr_t a) {
+ std::vector<Seg>::size_type i = find(a);
+ if (mSegs[i].lo == a) {
+ return;
+ }
+ mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
+ mSegs[i].hi = a - 1;
+ mSegs[i + 1].lo = a;
+ }
+
+ void show() {
+ printf("<< %d entries:\n", (int)mSegs.size());
+ for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
+ ++iter) {
+ printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
+ (unsigned long long int)(*iter).hi,
+ (*iter).val ? "true" : "false");
+ }
+ printf(">>\n");
+ }
+
+ std::vector<Seg> mSegs;
+};
+
+////////////////////////////////////////////////////////////////
+// PriMap //
+////////////////////////////////////////////////////////////////
+
+class PriMap {
+ public:
+ explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
+
+ // RUNS IN NO-MALLOC CONTEXT
+ pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
+ SecMap* sm = FindSecMap(ia);
+ return pair<const RuleSet*, const vector<PfxInstr>*>(
+ sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
+ }
+
+ // Add a secondary map. No overlaps allowed w.r.t. existing
+ // secondary maps.
+ void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
+ // We can't add an empty SecMap to the PriMap. But that's OK
+ // since we'd never be able to find anything in it anyway.
+ if (aSecMap->IsEmpty()) {
+ return;
+ }
+
+ // Iterate through the SecMaps and find the right place for this
+ // one. At the same time, ensure that the in-order
+ // non-overlapping invariant is preserved (and, generally, holds).
+ // FIXME: this gives a cost that is O(N^2) in the total number of
+ // shared objects in the system. ToDo: better.
+ MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
+
+ size_t num_secMaps = mSecMaps.size();
+ uintptr_t i;
+ for (i = 0; i < num_secMaps; ++i) {
+ mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+ MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
+ if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
+ // |aSecMap| needs to be inserted immediately before mSecMaps[i].
+ break;
+ }
+ }
+ MOZ_ASSERT(i <= num_secMaps);
+ if (i == num_secMaps) {
+ // It goes at the end.
+ mSecMaps.push_back(std::move(aSecMap));
+ } else {
+ std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
+ mSecMaps.begin() + i;
+ mSecMaps.insert(iter, std::move(aSecMap));
+ }
+ char buf[100];
+ SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
+ (int)mSecMaps.size());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+ }
+
+ // Remove and delete any SecMaps in the mapping, that intersect
+ // with the specified address range.
+ void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
+ MOZ_ASSERT(avma_min <= avma_max);
+ size_t num_secMaps = mSecMaps.size();
+ if (num_secMaps > 0) {
+ intptr_t i;
+ // Iterate from end to start over the vector, so as to ensure
+ // that the special case where |avma_min| and |avma_max| denote
+ // the entire address space, can be completed in time proportional
+ // to the number of elements in the map.
+ for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
+ mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+ if (sm_i->mSummaryMaxAddr < avma_min ||
+ avma_max < sm_i->mSummaryMinAddr) {
+ // There's no overlap. Move on.
+ continue;
+ }
+ // We need to remove mSecMaps[i] and slide all those above it
+ // downwards to cover the hole.
+ mSecMaps.erase(mSecMaps.begin() + i);
+ }
+ }
+ }
+
+ // Return the number of currently contained SecMaps.
+ size_t CountSecMaps() { return mSecMaps.size(); }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ size_t n = aMallocSizeOf(this);
+
+ // It's conceivable that this call would be unsafe with some
+ // implementations of std::vector, but it seems to be working for now...
+ n += aMallocSizeOf(mSecMaps.data());
+
+ for (size_t i = 0; i < mSecMaps.size(); i++) {
+ n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
+ }
+
+ return n;
+ }
+
+ private:
+ // RUNS IN NO-MALLOC CONTEXT
+ SecMap* FindSecMap(uintptr_t ia) {
+ // Binary search mSecMaps to find one that brackets |ia|.
+ // lo and hi need to be signed, else the loop termination tests
+ // don't work properly.
+ long int lo = 0;
+ long int hi = (long int)mSecMaps.size() - 1;
+ while (true) {
+ // current unsearched space is from lo to hi, inclusive.
+ if (lo > hi) {
+ // not found
+ return nullptr;
+ }
+ long int mid = lo + ((hi - lo) / 2);
+ mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
+ uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
+ uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
+ if (ia < mid_minAddr) {
+ hi = mid - 1;
+ continue;
+ }
+ if (ia > mid_maxAddr) {
+ lo = mid + 1;
+ continue;
+ }
+ MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+ return mid_secMap.get();
+ }
+ // NOTREACHED
+ }
+
+ private:
+ // sorted array of per-object ranges, non overlapping, non empty
+ std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
+
+ // a logging sink, for debugging.
+ void (*mLog)(const char*);
+};
+
+////////////////////////////////////////////////////////////////
+// LUL //
+////////////////////////////////////////////////////////////////
+
+#define LUL_LOG(_str) \
+ do { \
+ char buf[200]; \
+ SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
+ uint64_t(profiler_current_process_id().ToNumber()), \
+ uint64_t(profiler_current_thread_id().ToNumber()), this, \
+ (_str)); \
+ buf[sizeof(buf) - 1] = 0; \
+ mLog(buf); \
+ } while (0)
+
+LUL::LUL(void (*aLog)(const char*))
+ : mLog(aLog),
+ mAdminMode(true),
+ mAdminThreadId(profiler_current_thread_id()),
+ mPriMap(new PriMap(aLog)),
+ mSegArray(new SegArray()),
+ mUSU(new UniqueStringUniverse()) {
+ LUL_LOG("LUL::LUL: Created object");
+}
+
+LUL::~LUL() {
+ LUL_LOG("LUL::~LUL: Destroyed object");
+ delete mPriMap;
+ delete mSegArray;
+ mLog = nullptr;
+ delete mUSU;
+}
+
+void LUL::MaybeShowStats() {
+ // This is racey in the sense that it can't guarantee that
+ // n_new == n_new_Context + n_new_CFI + n_new_Scanned
+ // if it should happen that mStats is updated by some other thread
+ // in between computation of n_new and n_new_{Context,CFI,FP}.
+ // But it's just stats printing, so we don't really care.
+ uint32_t n_new = mStats - mStatsPrevious;
+ if (n_new >= 5000) {
+ uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
+ uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
+ uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
+ mStatsPrevious = mStats;
+ char buf[200];
+ SprintfLiteral(buf,
+ "LUL frame stats: TOTAL %5u"
+ " CTX %4u CFI %4u FP %4u",
+ n_new, n_new_Context, n_new_CFI, n_new_FP);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+ }
+}
+
+size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ size_t n = aMallocSizeOf(this);
+ n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
+
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - mSegArray
+ // - mUSU
+
+ return n;
+}
+
+void LUL::EnableUnwinding() {
+ LUL_LOG("LUL::EnableUnwinding");
+ // Don't assert for Admin mode here. That is, tolerate a call here
+ // if we are already in Unwinding mode.
+ MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+ mAdminMode = false;
+}
+
+void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+ const void* aMappedImage) {
+ MOZ_RELEASE_ASSERT(mAdminMode);
+ MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+ mLog(":\n");
+ char buf[200];
+ SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
+ (unsigned long long int)aRXavma, (unsigned long long int)aSize,
+ aFileName);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+
+ // Ignore obviously-stupid notifications.
+ if (aSize > 0) {
+ // Here's a new mapping, for this object.
+ mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
+
+ // Read CFI or EXIDX unwind data into |smap|.
+ if (!aMappedImage) {
+ (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
+ smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+ } else {
+ (void)lul::ReadSymbolDataInternal(
+ (const uint8_t*)aMappedImage, string(aFileName),
+ std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+ }
+
+ mLog("NotifyMap .. preparing entries\n");
+
+ smap->PrepareRuleSets(aRXavma, aSize);
+
+ SprintfLiteral(buf, "NotifyMap got %lld entries\n",
+ (long long int)smap->Size());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+
+ // Add it to the primary map (the top level set of mapped objects).
+ mPriMap->AddSecMap(std::move(smap));
+
+ // Tell the segment array about the mapping, so that the stack
+ // scan and __kernel_syscall mechanisms know where valid code is.
+ mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+ }
+}
+
+void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
+ MOZ_RELEASE_ASSERT(mAdminMode);
+ MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+ mLog(":\n");
+ char buf[200];
+ SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
+ (unsigned long long int)aRXavma,
+ (unsigned long long int)aSize);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+
+ // Ignore obviously-stupid notifications.
+ if (aSize > 0) {
+ // Tell the segment array about the mapping, so that the stack
+ // scan and __kernel_syscall mechanisms know where valid code is.
+ mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+ }
+}
+
+void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
+ MOZ_RELEASE_ASSERT(mAdminMode);
+ MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+ mLog(":\n");
+ char buf[100];
+ SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
+ (unsigned long long int)aRXavmaMin,
+ (unsigned long long int)aRXavmaMax);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+
+ MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
+
+ // Remove from the primary map, any secondary maps that intersect
+ // with the address range. Also delete the secondary maps.
+ mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
+
+ // Tell the segment array that the address range no longer
+ // contains valid code.
+ mSegArray->add(aRXavmaMin, aRXavmaMax, false);
+
+ SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
+ (int)mPriMap->CountSecMaps());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+}
+
+size_t LUL::CountMappings() {
+ MOZ_RELEASE_ASSERT(mAdminMode);
+ MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+ return mPriMap->CountSecMaps();
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
+ if (!aAddr.Valid()) {
+ return TaggedUWord();
+ }
+
+ // Lower limit check. |aAddr.Value()| is the lowest requested address
+ // and |aStackImg->mStartAvma| is the lowest address we actually have,
+ // so the comparison is straightforward.
+ if (aAddr.Value() < aStackImg->mStartAvma) {
+ return TaggedUWord();
+ }
+
+ // Upper limit check. We must compute the highest requested address
+ // and the highest address we actually have, but being careful to
+ // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
+ // 3/7 values below that, then we will get overflow. See bug #1245477.
+ typedef CheckedInt<uintptr_t> CheckedUWord;
+ CheckedUWord highest_requested_plus_one =
+ CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
+ CheckedUWord highest_available_plus_one =
+ CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
+ if (!highest_requested_plus_one.isValid() // overflow?
+ || !highest_available_plus_one.isValid() // overflow?
+ || (highest_requested_plus_one.value() >
+ highest_available_plus_one.value())) { // in range?
+ return TaggedUWord();
+ }
+
+ return TaggedUWord(
+ *(uintptr_t*)(&aStackImg
+ ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
+ TaggedUWord aCFA) {
+ switch (aReg) {
+ case DW_REG_CFA:
+ return aCFA;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ case DW_REG_INTEL_XBP:
+ return aOldRegs->xbp;
+ case DW_REG_INTEL_XSP:
+ return aOldRegs->xsp;
+ case DW_REG_INTEL_XIP:
+ return aOldRegs->xip;
+#elif defined(GP_ARCH_arm)
+ case DW_REG_ARM_R7:
+ return aOldRegs->r7;
+ case DW_REG_ARM_R11:
+ return aOldRegs->r11;
+ case DW_REG_ARM_R12:
+ return aOldRegs->r12;
+ case DW_REG_ARM_R13:
+ return aOldRegs->r13;
+ case DW_REG_ARM_R14:
+ return aOldRegs->r14;
+ case DW_REG_ARM_R15:
+ return aOldRegs->r15;
+#elif defined(GP_ARCH_arm64)
+ case DW_REG_AARCH64_X29:
+ return aOldRegs->x29;
+ case DW_REG_AARCH64_X30:
+ return aOldRegs->x30;
+ case DW_REG_AARCH64_SP:
+ return aOldRegs->sp;
+#elif defined(GP_ARCH_mips64)
+ case DW_REG_MIPS_SP:
+ return aOldRegs->sp;
+ case DW_REG_MIPS_FP:
+ return aOldRegs->fp;
+ case DW_REG_MIPS_PC:
+ return aOldRegs->pc;
+#else
+# error "Unsupported arch"
+#endif
+ default:
+ MOZ_ASSERT(0);
+ return TaggedUWord();
+ }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+// See prototype for comment.
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+ TaggedUWord aCFA, const StackImage* aStackImg,
+ const vector<PfxInstr>& aPfxInstrs) {
+ // A small evaluation stack, and a stack pointer, which points to
+ // the highest numbered in-use element.
+ const int N_STACK = 10;
+ TaggedUWord stack[N_STACK];
+ int stackPointer = -1;
+ for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
+
+#define PUSH(_tuw) \
+ do { \
+ if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
+ stack[++stackPointer] = (_tuw); \
+ } while (0)
+
+#define POP(_lval) \
+ do { \
+ if (stackPointer < 0) goto fail; /* underflow */ \
+ _lval = stack[stackPointer--]; \
+ } while (0)
+
+ // Cursor in the instruction sequence.
+ size_t curr = start + 1;
+
+ // Check the start point is sane.
+ size_t nInstrs = aPfxInstrs.size();
+ if (start < 0 || (size_t)start >= nInstrs) goto fail;
+
+ {
+ // The instruction sequence must start with PX_Start. If not,
+ // something is seriously wrong.
+ PfxInstr first = aPfxInstrs[start];
+ if (first.mOpcode != PX_Start) goto fail;
+
+ // Push the CFA on the stack to start with (or not), as required by
+ // the original DW_OP_*expression* CFI.
+ if (first.mOperand != 0) PUSH(aCFA);
+ }
+
+ while (true) {
+ if (curr >= nInstrs) goto fail; // ran off the end of the sequence
+
+ PfxInstr pfxi = aPfxInstrs[curr++];
+ if (pfxi.mOpcode == PX_End) break; // we're done
+
+ switch (pfxi.mOpcode) {
+ case PX_Start:
+ // This should appear only at the start of the sequence.
+ goto fail;
+ case PX_End:
+ // We just took care of that, so we shouldn't see it again.
+ MOZ_ASSERT(0);
+ goto fail;
+ case PX_SImm32:
+ PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
+ break;
+ case PX_DwReg: {
+ DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
+ MOZ_ASSERT(reg != DW_REG_CFA);
+ PUSH(EvaluateReg(reg, aOldRegs, aCFA));
+ break;
+ }
+ case PX_Deref: {
+ TaggedUWord addr;
+ POP(addr);
+ PUSH(DerefTUW(addr, aStackImg));
+ break;
+ }
+ case PX_Add: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y + x);
+ break;
+ }
+ case PX_Sub: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y - x);
+ break;
+ }
+ case PX_And: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y & x);
+ break;
+ }
+ case PX_Or: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y | x);
+ break;
+ }
+ case PX_CmpGES: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y.CmpGEs(x));
+ break;
+ }
+ case PX_Shl: {
+ TaggedUWord x, y;
+ POP(x);
+ POP(y);
+ PUSH(y << x);
+ break;
+ }
+ default:
+ MOZ_ASSERT(0);
+ goto fail;
+ }
+ } // while (true)
+
+ // Evaluation finished. The top value on the stack is the result.
+ if (stackPointer >= 0) {
+ return stack[stackPointer];
+ }
+ // Else fall through
+
+fail:
+ return TaggedUWord();
+
+#undef PUSH
+#undef POP
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+ const StackImage* aStackImg,
+ const vector<PfxInstr>* aPfxInstrs) const {
+ switch (mHow) {
+ case UNKNOWN:
+ return TaggedUWord();
+ case NODEREF: {
+ TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+ tuw = tuw + TaggedUWord((intptr_t)mOffset);
+ return tuw;
+ }
+ case DEREF: {
+ TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+ tuw = tuw + TaggedUWord((intptr_t)mOffset);
+ return DerefTUW(tuw, aStackImg);
+ }
+ case PFXEXPR: {
+ MOZ_ASSERT(aPfxInstrs);
+ if (!aPfxInstrs) {
+ return TaggedUWord();
+ }
+ return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
+ }
+ default:
+ MOZ_ASSERT(0);
+ return TaggedUWord();
+ }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
+ const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
+ // Take a copy of regs, since we'll need to refer to the old values
+ // whilst computing the new ones.
+ UnwindRegs old_regs = *aRegs;
+
+ // Mark all the current register values as invalid, so that the
+ // caller can see, on our return, which ones have been computed
+ // anew. If we don't even manage to compute a new PC value, then
+ // the caller will have to abandon the unwind.
+ // FIXME: Create and use instead: aRegs->SetAllInvalid();
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ aRegs->xbp = TaggedUWord();
+ aRegs->xsp = TaggedUWord();
+ aRegs->xip = TaggedUWord();
+#elif defined(GP_ARCH_arm)
+ aRegs->r7 = TaggedUWord();
+ aRegs->r11 = TaggedUWord();
+ aRegs->r12 = TaggedUWord();
+ aRegs->r13 = TaggedUWord();
+ aRegs->r14 = TaggedUWord();
+ aRegs->r15 = TaggedUWord();
+#elif defined(GP_ARCH_arm64)
+ aRegs->x29 = TaggedUWord();
+ aRegs->x30 = TaggedUWord();
+ aRegs->sp = TaggedUWord();
+ aRegs->pc = TaggedUWord();
+#elif defined(GP_ARCH_mips64)
+ aRegs->sp = TaggedUWord();
+ aRegs->fp = TaggedUWord();
+ aRegs->pc = TaggedUWord();
+#else
+# error "Unsupported arch"
+#endif
+
+ // This is generally useful.
+ const TaggedUWord inval = TaggedUWord();
+
+ // First, compute the CFA.
+ TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
+ aStackImg, aPfxInstrs);
+
+ // If we didn't manage to compute the CFA, well .. that's ungood,
+ // but keep going anyway. It'll be OK provided none of the register
+ // value rules mention the CFA. In any case, compute the new values
+ // for each register that we're tracking.
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ aRegs->xbp =
+ aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->xsp =
+ aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->xip =
+ aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm)
+ aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->r11 =
+ aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->r12 =
+ aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->r13 =
+ aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->r14 =
+ aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->r15 =
+ aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm64)
+ aRegs->x29 =
+ aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->x30 =
+ aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_mips64)
+ aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+ aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#else
+# error "Unsupported arch"
+#endif
+
+ // We're done. Any regs for which we didn't manage to compute a
+ // new value will now be marked as invalid.
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
+ /*OUT*/ uintptr_t* aFrameSPs,
+ /*OUT*/ size_t* aFramesUsed,
+ /*OUT*/ size_t* aFramePointerFramesAcquired,
+ size_t aFramesAvail, UnwindRegs* aStartRegs,
+ StackImage* aStackImg) {
+ MOZ_RELEASE_ASSERT(!mAdminMode);
+
+ /////////////////////////////////////////////////////////
+ // BEGIN UNWIND
+
+ *aFramesUsed = 0;
+
+ UnwindRegs regs = *aStartRegs;
+ TaggedUWord last_valid_sp = TaggedUWord();
+
+ while (true) {
+ if (DEBUG_MAIN) {
+ char buf[300];
+ mLog("\n");
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ SprintfLiteral(
+ buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
+ (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
+ (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
+ (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+#elif defined(GP_ARCH_arm)
+ SprintfLiteral(
+ buf,
+ "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
+ " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
+ (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
+ (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
+ (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
+ (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
+ (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
+ (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+#elif defined(GP_ARCH_arm64)
+ SprintfLiteral(
+ buf,
+ "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
+ " sp %d/%llx\n",
+ (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+ (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
+ (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
+ (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+#elif defined(GP_ARCH_mips64)
+ SprintfLiteral(
+ buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
+ (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+ (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
+ (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+#else
+# error "Unsupported arch"
+#endif
+ }
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ TaggedUWord ia = regs.xip;
+ TaggedUWord sp = regs.xsp;
+#elif defined(GP_ARCH_arm)
+ TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
+ TaggedUWord sp = regs.r13;
+#elif defined(GP_ARCH_arm64)
+ TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
+ TaggedUWord sp = regs.sp;
+#elif defined(GP_ARCH_mips64)
+ TaggedUWord ia = regs.pc;
+ TaggedUWord sp = regs.sp;
+#else
+# error "Unsupported arch"
+#endif
+
+ if (*aFramesUsed >= aFramesAvail) {
+ break;
+ }
+
+ // If we don't have a valid value for the PC, give up.
+ if (!ia.Valid()) {
+ break;
+ }
+
+ // If this is the innermost frame, record the SP value, which
+ // presumably is valid. If this isn't the innermost frame, and we
+ // have a valid SP value, check that its SP value isn't less that
+ // the one we've seen so far, so as to catch potential SP value
+ // cycles.
+ if (*aFramesUsed == 0) {
+ last_valid_sp = sp;
+ } else {
+ MOZ_ASSERT(last_valid_sp.Valid());
+ if (sp.Valid()) {
+ if (sp.Value() < last_valid_sp.Value()) {
+ // Hmm, SP going in the wrong direction. Let's stop.
+ break;
+ }
+ // Remember where we got to.
+ last_valid_sp = sp;
+ }
+ }
+
+ // For the innermost frame, the IA value is what we need. For all
+ // other frames, it's actually the return address, so back up one
+ // byte so as to get it into the calling instruction.
+ aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
+ aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
+ (*aFramesUsed)++;
+
+ // Find the RuleSet for the current IA, if any. This will also
+ // query the backing (secondary) maps if it isn't found in the
+ // thread-local cache.
+
+ // If this isn't the innermost frame, back up into the calling insn.
+ if (*aFramesUsed > 1) {
+ ia = ia + TaggedUWord((uintptr_t)(-1));
+ }
+
+ pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
+ mPriMap->Lookup(ia.Value());
+ const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
+ const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
+
+ if (DEBUG_MAIN) {
+ char buf[100];
+ SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
+ (unsigned long long int)ia.Value(), ruleset);
+ buf[sizeof(buf) - 1] = 0;
+ mLog(buf);
+ }
+
+#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+ /////////////////////////////////////////////
+ ////
+ // On 32 bit x86-linux, syscalls are often done via the VDSO
+ // function __kernel_vsyscall, which doesn't have a corresponding
+ // object that we can read debuginfo from. That effectively kills
+ // off all stack traces for threads blocked in syscalls. Hence
+ // special-case by looking at the code surrounding the program
+ // counter.
+ //
+ // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
+ // 0xf7757421 <__kernel_vsyscall+1>: push %edx
+ // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
+ // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
+ // 0xf7757425 <__kernel_vsyscall+5>: sysenter
+ // 0xf7757427 <__kernel_vsyscall+7>: nop
+ // 0xf7757428 <__kernel_vsyscall+8>: nop
+ // 0xf7757429 <__kernel_vsyscall+9>: nop
+ // 0xf775742a <__kernel_vsyscall+10>: nop
+ // 0xf775742b <__kernel_vsyscall+11>: nop
+ // 0xf775742c <__kernel_vsyscall+12>: nop
+ // 0xf775742d <__kernel_vsyscall+13>: nop
+ // 0xf775742e <__kernel_vsyscall+14>: int $0x80
+ // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
+ // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
+ // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
+ // 0xf7757433 <__kernel_vsyscall+19>: ret
+ //
+ // In cases where the sampled thread is blocked in a syscall, its
+ // program counter will point at "pop %ebp". Hence we look for
+ // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
+ // the corresponding register-recovery actions are:
+ // new_ebp = *(old_esp + 0)
+ // new eip = *(old_esp + 12)
+ // new_esp = old_esp + 16
+ //
+ // It may also be the case that the program counter points two
+ // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
+ // the case where the syscall has been restarted but the thread
+ // hasn't been rescheduled. The code below doesn't handle that;
+ // it could easily be made to.
+ //
+ if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
+ uintptr_t insns_min, insns_max;
+ uintptr_t eip = ia.Value();
+ bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
+ if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
+ uint8_t* eipC = (uint8_t*)eip;
+ if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
+ eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
+ TaggedUWord sp_plus_0 = sp;
+ TaggedUWord sp_plus_12 = sp;
+ TaggedUWord sp_plus_16 = sp;
+ sp_plus_12 = sp_plus_12 + TaggedUWord(12);
+ sp_plus_16 = sp_plus_16 + TaggedUWord(16);
+ TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
+ TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
+ TaggedUWord new_esp = sp_plus_16;
+ if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
+ regs.xbp = new_ebp;
+ regs.xip = new_eip;
+ regs.xsp = new_esp;
+ continue;
+ }
+ }
+ }
+ }
+ ////
+ /////////////////////////////////////////////
+#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+
+ // So, do we have a ruleset for this address? If so, use it now.
+ if (ruleset) {
+ if (DEBUG_MAIN) {
+ ruleset->Print(mLog);
+ mLog("\n");
+ }
+ // Use the RuleSet to compute the registers for the previous
+ // frame. |regs| is modified in-place.
+ UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
+ continue;
+ }
+
+#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
+ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
+ defined(GP_PLAT_amd64_freebsd)
+ // There's no RuleSet for the specified address. On amd64/x86_linux, see if
+ // it's possible to recover the caller's frame by using the frame pointer.
+
+ // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
+ // and assume the following layout:
+ //
+ // <--- new_SP
+ // +----------+
+ // | new_IP | (return address)
+ // +----------+
+ // | new_BP | <--- old_BP
+ // +----------+
+ // | .... |
+ // | .... |
+ // | .... |
+ // +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
+
+ const size_t wordSzB = sizeof(uintptr_t);
+ TaggedUWord old_xsp = regs.xsp;
+
+ // points at new_BP ?
+ TaggedUWord old_xbp = regs.xbp;
+ // points at new_IP ?
+ TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
+ // is the new_SP ?
+ TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
+
+ if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
+ old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
+ // We don't need to do any range, alignment or validity checks for
+ // addresses passed to DerefTUW, since that performs them itself, and
+ // returns an invalid value on failure. Any such value will poison
+ // subsequent uses, and we do a final check for validity before putting
+ // the computed values into |regs|.
+ TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
+ if (new_xbp.Valid() && new_xbp.IsAligned() &&
+ old_xbp.Value() < new_xbp.Value()) {
+ TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
+ TaggedUWord new_xsp = old_xbp_plus2;
+ if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
+ regs.xbp = new_xbp;
+ regs.xip = new_xip;
+ regs.xsp = new_xsp;
+ (*aFramePointerFramesAcquired)++;
+ continue;
+ }
+ }
+ }
+#elif defined(GP_ARCH_arm64)
+ // Here is an example of generated code for prologue and epilogue..
+ //
+ // stp x29, x30, [sp, #-16]!
+ // mov x29, sp
+ // ...
+ // ldp x29, x30, [sp], #16
+ // ret
+ //
+ // Next is another example of generated code.
+ //
+ // stp x20, x19, [sp, #-32]!
+ // stp x29, x30, [sp, #16]
+ // add x29, sp, #0x10
+ // ...
+ // ldp x29, x30, [sp, #16]
+ // ldp x20, x19, [sp], #32
+ // ret
+ //
+ // Previous x29 and x30 register are stored in the address of x29 register.
+ // But since sp register value depends on local variables, we cannot compute
+ // previous sp register from current sp/fp/lr register and there is no
+ // regular rule for sp register in prologue. But since return address is lr
+ // register, if x29 is valid, we will get return address without sp
+ // register.
+ //
+ // So we assume the following layout that if no rule set. x29 is frame
+ // pointer, so we will be able to compute x29 and x30 .
+ //
+ // +----------+ <--- new_sp (cannot compute)
+ // | .... |
+ // +----------+
+ // | new_lr | (return address)
+ // +----------+
+ // | new_fp | <--- old_fp
+ // +----------+
+ // | .... |
+ // | .... |
+ // +----------+ <---- old_sp (arbitrary, but unused)
+
+ TaggedUWord old_fp = regs.x29;
+ if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
+ last_valid_sp.Value() <= old_fp.Value()) {
+ TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
+ if (new_fp.Valid() && new_fp.IsAligned() &&
+ old_fp.Value() < new_fp.Value()) {
+ TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
+ TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
+ if (new_lr.Valid()) {
+ regs.x29 = new_fp;
+ regs.x30 = new_lr;
+ // When using frame pointer to walk stack, we cannot compute sp
+ // register since we cannot compute sp register from fp/lr/sp
+ // register, and there is no regular rule to compute previous sp
+ // register. So mark as invalid.
+ regs.sp = TaggedUWord();
+ (*aFramePointerFramesAcquired)++;
+ continue;
+ }
+ }
+ }
+#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
+ // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
+
+ // We failed to recover a frame either using CFI or FP chasing, and we
+ // have no other ways to recover the frame. So we have to give up.
+ break;
+
+ } // top level unwind loop
+
+ // END UNWIND
+ /////////////////////////////////////////////////////////
+}
+
+////////////////////////////////////////////////////////////////
+// LUL Unit Testing //
+////////////////////////////////////////////////////////////////
+
+static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
+
+#if defined(GP_ARCH_mips64)
+static __attribute__((noinline)) unsigned long __getpc(void) {
+ unsigned long rtaddr;
+ __asm__ volatile("move %0, $31" : "=r"(rtaddr));
+ return rtaddr;
+}
+#endif
+
+// This function is innermost in the test call sequence. It uses LUL
+// to unwind, and compares the result with the sequence specified in
+// the director string. These need to agree in order for the test to
+// pass. In order not to screw up the results, this function needs
+// to have a not-very big stack frame, since we're only presenting
+// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
+// that chunk unavoidably includes the frame for this function.
+//
+// This function must not be inlined into its callers. Doing so will
+// cause the expected-vs-actual backtrace consistency checking to
+// fail. Prints summary results to |aLUL|'s logging sink and also
+// returns a boolean indicating whether or not the test failed.
+static __attribute__((noinline)) bool GetAndCheckStackTrace(
+ LUL* aLUL, const char* dstring) {
+ // Get hold of the current unwind-start registers.
+ UnwindRegs startRegs;
+ memset(&startRegs, 0, sizeof(startRegs));
+#if defined(GP_ARCH_amd64)
+ volatile uintptr_t block[3];
+ MOZ_ASSERT(sizeof(block) == 24);
+ __asm__ __volatile__(
+ "leaq 0(%%rip), %%r15"
+ "\n\t"
+ "movq %%r15, 0(%0)"
+ "\n\t"
+ "movq %%rsp, 8(%0)"
+ "\n\t"
+ "movq %%rbp, 16(%0)"
+ "\n"
+ :
+ : "r"(&block[0])
+ : "memory", "r15");
+ startRegs.xip = TaggedUWord(block[0]);
+ startRegs.xsp = TaggedUWord(block[1]);
+ startRegs.xbp = TaggedUWord(block[2]);
+ const uintptr_t REDZONE_SIZE = 128;
+ uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ volatile uintptr_t block[3];
+ MOZ_ASSERT(sizeof(block) == 12);
+ __asm__ __volatile__(
+ ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
+ "\n\t"
+ "popl %%edi"
+ "\n\t"
+ "movl %%edi, 0(%0)"
+ "\n\t"
+ "movl %%esp, 4(%0)"
+ "\n\t"
+ "movl %%ebp, 8(%0)"
+ "\n"
+ :
+ : "r"(&block[0])
+ : "memory", "edi");
+ startRegs.xip = TaggedUWord(block[0]);
+ startRegs.xsp = TaggedUWord(block[1]);
+ startRegs.xbp = TaggedUWord(block[2]);
+ const uintptr_t REDZONE_SIZE = 0;
+ uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ volatile uintptr_t block[6];
+ MOZ_ASSERT(sizeof(block) == 24);
+ __asm__ __volatile__(
+ "mov r0, r15"
+ "\n\t"
+ "str r0, [%0, #0]"
+ "\n\t"
+ "str r14, [%0, #4]"
+ "\n\t"
+ "str r13, [%0, #8]"
+ "\n\t"
+ "str r12, [%0, #12]"
+ "\n\t"
+ "str r11, [%0, #16]"
+ "\n\t"
+ "str r7, [%0, #20]"
+ "\n"
+ :
+ : "r"(&block[0])
+ : "memory", "r0");
+ startRegs.r15 = TaggedUWord(block[0]);
+ startRegs.r14 = TaggedUWord(block[1]);
+ startRegs.r13 = TaggedUWord(block[2]);
+ startRegs.r12 = TaggedUWord(block[3]);
+ startRegs.r11 = TaggedUWord(block[4]);
+ startRegs.r7 = TaggedUWord(block[5]);
+ const uintptr_t REDZONE_SIZE = 0;
+ uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_arm64)
+ volatile uintptr_t block[4];
+ MOZ_ASSERT(sizeof(block) == 32);
+ __asm__ __volatile__(
+ "adr x0, . \n\t"
+ "str x0, [%0, #0] \n\t"
+ "str x29, [%0, #8] \n\t"
+ "str x30, [%0, #16] \n\t"
+ "mov x0, sp \n\t"
+ "str x0, [%0, #24] \n\t"
+ :
+ : "r"(&block[0])
+ : "memory", "x0");
+ startRegs.pc = TaggedUWord(block[0]);
+ startRegs.x29 = TaggedUWord(block[1]);
+ startRegs.x30 = TaggedUWord(block[2]);
+ startRegs.sp = TaggedUWord(block[3]);
+ const uintptr_t REDZONE_SIZE = 0;
+ uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_mips64)
+ volatile uintptr_t block[3];
+ MOZ_ASSERT(sizeof(block) == 24);
+ __asm__ __volatile__(
+ "sd $29, 8(%0) \n"
+ "sd $30, 16(%0) \n"
+ :
+ : "r"(block)
+ : "memory");
+ block[0] = __getpc();
+ startRegs.pc = TaggedUWord(block[0]);
+ startRegs.sp = TaggedUWord(block[1]);
+ startRegs.fp = TaggedUWord(block[2]);
+ const uintptr_t REDZONE_SIZE = 0;
+ uintptr_t start = block[1] - REDZONE_SIZE;
+#else
+# error "Unsupported platform"
+#endif
+
+ // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
+ // stack.
+ uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
+ uintptr_t ws = sizeof(void*);
+ start &= ~(ws - 1);
+ end &= ~(ws - 1);
+ uintptr_t nToCopy = end - start;
+ if (nToCopy > lul::N_STACK_BYTES) {
+ nToCopy = lul::N_STACK_BYTES;
+ }
+ MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+ StackImage* stackImg = new StackImage();
+ stackImg->mLen = nToCopy;
+ stackImg->mStartAvma = start;
+ if (nToCopy > 0) {
+ MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
+ memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
+ }
+
+ // Unwind it.
+ const int MAX_TEST_FRAMES = 64;
+ uintptr_t framePCs[MAX_TEST_FRAMES];
+ uintptr_t frameSPs[MAX_TEST_FRAMES];
+ size_t framesAvail = mozilla::ArrayLength(framePCs);
+ size_t framesUsed = 0;
+ size_t framePointerFramesAcquired = 0;
+ aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
+ &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
+
+ delete stackImg;
+
+ // if (0) {
+ // // Show what we have.
+ // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
+ // for (size_t i = 0; i < framesUsed; i++) {
+ // fprintf(stderr, " [%2d] SP %p PC %p\n",
+ // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
+ // }
+ // fprintf(stderr, "\n");
+ //}
+
+ // Check to see if there's a consistent binding between digits in
+ // the director string ('1' .. '8') and the PC values acquired by
+ // the unwind. If there isn't, the unwinding has failed somehow.
+ uintptr_t binding[8]; // binding for '1' .. binding for '8'
+ memset((void*)binding, 0, sizeof(binding));
+
+ // The general plan is to work backwards along the director string
+ // and forwards along the framePCs array. Doing so corresponds to
+ // working outwards from the innermost frame of the recursive test set.
+ const char* cursor = dstring;
+
+ // Find the end. This leaves |cursor| two bytes past the first
+ // character we want to look at -- see comment below.
+ while (*cursor) cursor++;
+
+ // Counts the number of consistent frames.
+ size_t nConsistent = 0;
+
+ // Iterate back to the start of the director string. The starting
+ // points are a bit complex. We can't use framePCs[0] because that
+ // contains the PC in this frame (above). We can't use framePCs[1]
+ // because that will contain the PC at return point in the recursive
+ // test group (TestFn[1-8]) for their call "out" to this function,
+ // GetAndCheckStackTrace. Although LUL will compute a correct
+ // return address, that will not be the same return address as for a
+ // recursive call out of the the function to another function in the
+ // group. Hence we can only start consistency checking at
+ // framePCs[2].
+ //
+ // To be consistent, then, we must ignore the last element in the
+ // director string as that corresponds to framePCs[1]. Hence the
+ // start points are: framePCs[2] and the director string 2 bytes
+ // before the terminating zero.
+ //
+ // Also as a result of this, the number of consistent frames counted
+ // will always be one less than the length of the director string
+ // (not including its terminating zero).
+ size_t frameIx;
+ for (cursor = cursor - 2, frameIx = 2;
+ cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
+ char c = *cursor;
+ uintptr_t pc = framePCs[frameIx];
+ // If this doesn't hold, the director string is ill-formed.
+ MOZ_ASSERT(c >= '1' && c <= '8');
+ int n = ((int)c) - ((int)'1');
+ if (binding[n] == 0) {
+ // There's no binding for |c| yet, so install |pc| and carry on.
+ binding[n] = pc;
+ nConsistent++;
+ continue;
+ }
+ // There's a pre-existing binding for |c|. Check it's consistent.
+ if (binding[n] != pc) {
+ // Not consistent. Give up now.
+ break;
+ }
+ // Consistent. Keep going.
+ nConsistent++;
+ }
+
+ // So, did we succeed?
+ bool passed = nConsistent + 1 == strlen(dstring);
+
+ // Show the results.
+ char buf[200];
+ SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
+ buf[sizeof(buf) - 1] = 0;
+ aLUL->mLog(buf);
+ SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
+ (int)nConsistent, (int)strlen(dstring),
+ passed ? "PASS" : "FAIL");
+ buf[sizeof(buf) - 1] = 0;
+ aLUL->mLog(buf);
+
+ return !passed;
+}
+
+// Macro magic to create a set of 8 mutually recursive functions with
+// varying frame sizes. These will recurse amongst themselves as
+// specified by |strP|, the directory string, and call
+// GetAndCheckStackTrace when the string becomes empty, passing it the
+// original value of the string. This checks the result, printing
+// results on |aLUL|'s logging sink, and also returns a boolean
+// indicating whether or not the results are acceptable (correct).
+
+#define DECL_TEST_FN(NAME) \
+ bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
+
+#define GEN_TEST_FN(NAME, FRAMESIZE) \
+ bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
+ /* Create a frame of size (at least) FRAMESIZE, so that the */ \
+ /* 8 functions created by this macro offer some variation in frame */ \
+ /* sizes. This isn't as simple as it might seem, since a clever */ \
+ /* optimizing compiler (eg, clang-5) detects that the array is unused */ \
+ /* and removes it. We try to defeat this by passing it to a function */ \
+ /* in a different compilation unit, and hoping that clang does not */ \
+ /* notice that the call is a no-op. */ \
+ char space[FRAMESIZE]; \
+ Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
+ \
+ if (*strP == '\0') { \
+ /* We've come to the end of the director string. */ \
+ /* Take a stack snapshot. */ \
+ /* We purposefully use a negation to avoid tail-call optimization */ \
+ return !GetAndCheckStackTrace(aLUL, strPorig); \
+ } else { \
+ /* Recurse onwards. This is a bit subtle. The obvious */ \
+ /* thing to do here is call onwards directly, from within the */ \
+ /* arms of the case statement. That gives a problem in that */ \
+ /* there will be multiple return points inside each function when */ \
+ /* unwinding, so it will be difficult to check for consistency */ \
+ /* against the director string. Instead, we make an indirect */ \
+ /* call, so as to guarantee that there is only one call site */ \
+ /* within each function. This does assume that the compiler */ \
+ /* won't transform it back to the simple direct-call form. */ \
+ /* To discourage it from doing so, the call is bracketed with */ \
+ /* __asm__ __volatile__ sections so as to make it not-movable. */ \
+ bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
+ switch (*strP) { \
+ case '1': \
+ nextFn = TestFn1; \
+ break; \
+ case '2': \
+ nextFn = TestFn2; \
+ break; \
+ case '3': \
+ nextFn = TestFn3; \
+ break; \
+ case '4': \
+ nextFn = TestFn4; \
+ break; \
+ case '5': \
+ nextFn = TestFn5; \
+ break; \
+ case '6': \
+ nextFn = TestFn6; \
+ break; \
+ case '7': \
+ nextFn = TestFn7; \
+ break; \
+ case '8': \
+ nextFn = TestFn8; \
+ break; \
+ default: \
+ nextFn = TestFn8; \
+ break; \
+ } \
+ /* "use" |space| immediately after the recursive call, */ \
+ /* so as to dissuade clang from deallocating the space while */ \
+ /* the call is active, or otherwise messing with the stack frame. */ \
+ __asm__ __volatile__("" ::: "cc", "memory"); \
+ bool passed = nextFn(aLUL, strPorig, strP + 1); \
+ Unused << write(1, space, 0); \
+ __asm__ __volatile__("" ::: "cc", "memory"); \
+ return passed; \
+ } \
+ }
+
+// The test functions are mutually recursive, so it is necessary to
+// declare them before defining them.
+DECL_TEST_FN(TestFn1)
+DECL_TEST_FN(TestFn2)
+DECL_TEST_FN(TestFn3)
+DECL_TEST_FN(TestFn4)
+DECL_TEST_FN(TestFn5)
+DECL_TEST_FN(TestFn6)
+DECL_TEST_FN(TestFn7)
+DECL_TEST_FN(TestFn8)
+
+GEN_TEST_FN(TestFn1, 123)
+GEN_TEST_FN(TestFn2, 456)
+GEN_TEST_FN(TestFn3, 789)
+GEN_TEST_FN(TestFn4, 23)
+GEN_TEST_FN(TestFn5, 47)
+GEN_TEST_FN(TestFn6, 117)
+GEN_TEST_FN(TestFn7, 1)
+GEN_TEST_FN(TestFn8, 99)
+
+// This starts the test sequence going. Call here to generate a
+// sequence of calls as directed by the string |dstring|. The call
+// sequence will, from its innermost frame, finish by calling
+// GetAndCheckStackTrace() and passing it |dstring|.
+// GetAndCheckStackTrace() will unwind the stack, check consistency
+// of those results against |dstring|, and print a pass/fail message
+// to aLUL's logging sink. It also updates the counters in *aNTests
+// and aNTestsPassed.
+__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
+ /*OUT*/ int* aNTestsPassed, LUL* aLUL,
+ const char* dstring) {
+ // Ensure that the stack has at least this much space on it. This
+ // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
+ // and hand it to LUL. Safe in the sense that no segfault can
+ // happen because the stack is at least this big. This is all
+ // somewhat dubious in the sense that a sufficiently clever compiler
+ // (clang, for one) can figure out that space[] is unused and delete
+ // it from the frame. Hence the somewhat elaborate hoop jumping to
+ // fill it up before the call and to at least appear to use the
+ // value afterwards.
+ int i;
+ volatile char space[LUL_UNIT_TEST_STACK_SIZE];
+ for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+ space[i] = (char)(i & 0x7F);
+ }
+
+ // Really run the test.
+ bool passed = TestFn1(aLUL, dstring, dstring);
+
+ // Appear to use space[], by visiting the value to compute some kind
+ // of checksum, and then (apparently) using the checksum.
+ int sum = 0;
+ for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+ // If this doesn't fool LLVM, I don't know what will.
+ sum += space[i] - 3 * i;
+ }
+ __asm__ __volatile__("" : : "r"(sum));
+
+ // Update the counters.
+ (*aNTests)++;
+ if (passed) {
+ (*aNTestsPassed)++;
+ }
+}
+
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+ LUL* aLUL) {
+ aLUL->mLog(":\n");
+ aLUL->mLog("LULUnitTest: BEGIN\n");
+ *aNTests = *aNTestsPassed = 0;
+ TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
+ TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
+ TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
+ TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
+ TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
+ TestUnw(aNTests, aNTestsPassed, aLUL,
+ "123456781122334455667788777777777777777777777");
+ aLUL->mLog("LULUnitTest: END\n");
+ aLUL->mLog(":\n");
+}
+
+} // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulMain.h b/mozglue/baseprofiler/lul/LulMain.h
new file mode 100644
index 0000000000..987a878c90
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMain.h
@@ -0,0 +1,378 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMain_h
+#define LulMain_h
+
+#include "PlatformMacros.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/BaseProfilerUtils.h"
+
+// LUL: A Lightweight Unwind Library.
+// This file provides the end-user (external) interface for LUL.
+
+// Some comments about naming in the implementation. These are safe
+// to ignore if you are merely using LUL, but are important if you
+// hack on its internals.
+//
+// Debuginfo readers in general have tended to use the word "address"
+// to mean several different things. This sometimes makes them
+// difficult to understand and maintain. LUL tries hard to avoid
+// using the word "address" and instead uses the following more
+// precise terms:
+//
+// * SVMA ("Stated Virtual Memory Address"): this is an address of a
+// symbol (etc) as it is stated in the symbol table, or other
+// metadata, of an object. Such values are typically small and
+// start from zero or thereabouts, unless the object has been
+// prelinked.
+//
+// * AVMA ("Actual Virtual Memory Address"): this is the address of a
+// symbol (etc) in a running process, that is, once the associated
+// object has been mapped into a process. Such values are typically
+// much larger than SVMAs, since objects can get mapped arbitrarily
+// far along the address space.
+//
+// * "Bias": the difference between AVMA and SVMA for a given symbol
+// (specifically, AVMA - SVMA). The bias is always an integral
+// number of pages. Once we know the bias for a given object's
+// text section (for example), we can compute the AVMAs of all of
+// its text symbols by adding the bias to their SVMAs.
+//
+// * "Image address": typically, to read debuginfo from an object we
+// will temporarily mmap in the file so as to read symbol tables
+// etc. Addresses in this temporary mapping are called "Image
+// addresses". Note that the temporary mapping is entirely
+// unrelated to the mappings of the file that the dynamic linker
+// must perform merely in order to get the program to run. Hence
+// image addresses are unrelated to either SVMAs or AVMAs.
+
+namespace lul {
+
+// A machine word plus validity tag.
+class TaggedUWord {
+ public:
+ // RUNS IN NO-MALLOC CONTEXT
+ // Construct a valid one.
+ explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {}
+
+ // RUNS IN NO-MALLOC CONTEXT
+ // Construct an invalid one.
+ TaggedUWord() : mValue(0), mValid(false) {}
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord operator+(TaggedUWord rhs) const {
+ return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value())
+ : TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord operator-(TaggedUWord rhs) const {
+ return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value())
+ : TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord operator&(TaggedUWord rhs) const {
+ return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value())
+ : TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord operator|(TaggedUWord rhs) const {
+ return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value())
+ : TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord CmpGEs(TaggedUWord rhs) const {
+ if (Valid() && rhs.Valid()) {
+ intptr_t s1 = (intptr_t)Value();
+ intptr_t s2 = (intptr_t)rhs.Value();
+ return TaggedUWord(s1 >= s2 ? 1 : 0);
+ }
+ return TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord operator<<(TaggedUWord rhs) const {
+ if (Valid() && rhs.Valid()) {
+ uintptr_t shift = rhs.Value();
+ if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift);
+ }
+ return TaggedUWord();
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ // Is equal? Note: non-validity on either side gives non-equality.
+ bool operator==(TaggedUWord other) const {
+ return (mValid && other.Valid()) ? (mValue == other.Value()) : false;
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ // Is it word-aligned?
+ bool IsAligned() const {
+ return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0;
+ }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ uintptr_t Value() const { return mValue; }
+
+ // RUNS IN NO-MALLOC CONTEXT
+ bool Valid() const { return mValid; }
+
+ private:
+ uintptr_t mValue;
+ bool mValid;
+};
+
+// The registers, with validity tags, that will be unwound.
+
+struct UnwindRegs {
+#if defined(GP_ARCH_arm)
+ TaggedUWord r7;
+ TaggedUWord r11;
+ TaggedUWord r12;
+ TaggedUWord r13;
+ TaggedUWord r14;
+ TaggedUWord r15;
+#elif defined(GP_ARCH_arm64)
+ TaggedUWord x29;
+ TaggedUWord x30;
+ TaggedUWord sp;
+ TaggedUWord pc;
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ TaggedUWord xbp;
+ TaggedUWord xsp;
+ TaggedUWord xip;
+#elif defined(GP_ARCH_mips64)
+ TaggedUWord sp;
+ TaggedUWord fp;
+ TaggedUWord pc;
+#else
+# error "Unknown plat"
+#endif
+};
+
+// The maximum number of bytes in a stack snapshot. This value can be increased
+// if necessary, but testing showed that 160k is enough to obtain good
+// backtraces on x86_64 Linux. Most backtraces fit comfortably into 4-8k of
+// stack space, but we do have some very deep stacks occasionally. Please see
+// the comments in DoNativeBacktrace as to why it's OK to have this value be so
+// large.
+static const size_t N_STACK_BYTES = 160 * 1024;
+
+// The stack chunk image that will be unwound.
+struct StackImage {
+ // [start_avma, +len) specify the address range in the buffer.
+ // Obviously we require 0 <= len <= N_STACK_BYTES.
+ uintptr_t mStartAvma;
+ size_t mLen;
+ uint8_t mContents[N_STACK_BYTES];
+};
+
+// Statistics collection for the unwinder.
+template <typename T>
+class LULStats {
+ public:
+ LULStats() : mContext(0), mCFI(0), mFP(0) {}
+
+ template <typename S>
+ explicit LULStats(const LULStats<S>& aOther)
+ : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {}
+
+ template <typename S>
+ LULStats<T>& operator=(const LULStats<S>& aOther) {
+ mContext = aOther.mContext;
+ mCFI = aOther.mCFI;
+ mFP = aOther.mFP;
+ return *this;
+ }
+
+ template <typename S>
+ uint32_t operator-(const LULStats<S>& aOther) {
+ return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) +
+ (mFP - aOther.mFP);
+ }
+
+ T mContext; // Number of context frames
+ T mCFI; // Number of CFI/EXIDX frames
+ T mFP; // Number of frame-pointer recovered frames
+};
+
+// The core unwinder library class. Just one of these is needed, and
+// it can be shared by multiple unwinder threads.
+//
+// The library operates in one of two modes.
+//
+// * Admin mode. The library is this state after creation. In Admin
+// mode, no unwinding may be performed. It is however allowable to
+// perform administrative tasks -- primarily, loading of unwind info
+// -- in this mode. In particular, it is safe for the library to
+// perform dynamic memory allocation in this mode. Safe in the
+// sense that there is no risk of deadlock against unwinding threads
+// that might -- because of where they have been sampled -- hold the
+// system's malloc lock.
+//
+// * Unwind mode. In this mode, calls to ::Unwind may be made, but
+// nothing else. ::Unwind guarantees not to make any dynamic memory
+// requests, so as to guarantee that the calling thread won't
+// deadlock in the case where it already holds the system's malloc lock.
+//
+// The library is created in Admin mode. After debuginfo is loaded,
+// the caller must switch it into Unwind mode by calling
+// ::EnableUnwinding. There is no way to switch it back to Admin mode
+// after that. To safely switch back to Admin mode would require the
+// caller (or other external agent) to guarantee that there are no
+// pending ::Unwind calls.
+
+class PriMap;
+class SegArray;
+class UniqueStringUniverse;
+
+class LUL {
+ public:
+ // Create; supply a logging sink. Sets the object in Admin mode.
+ explicit LUL(void (*aLog)(const char*));
+
+ // Destroy. Caller is responsible for ensuring that no other
+ // threads are in Unwind calls. All resources are freed and all
+ // registered unwinder threads are deregistered. Can be called
+ // either in Admin or Unwind mode.
+ ~LUL();
+
+ // Notify the library that unwinding is now allowed and so
+ // admin-mode calls are no longer allowed. The object is initially
+ // created in admin mode. The only possible transition is
+ // admin->unwinding, therefore.
+ void EnableUnwinding();
+
+ // Notify of a new r-x mapping, and load the associated unwind info.
+ // The filename is strdup'd and used for debug printing. If
+ // aMappedImage is NULL, this function will mmap/munmap the file
+ // itself, so as to be able to read the unwind info. If
+ // aMappedImage is non-NULL then it is assumed to point to a
+ // called-supplied and caller-managed mapped image of the file.
+ // May only be called in Admin mode.
+ void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+ const void* aMappedImage);
+
+ // In rare cases we know an executable area exists but don't know
+ // what the associated file is. This call notifies LUL of such
+ // areas. This is important for correct functioning of stack
+ // scanning and of the x86-{linux,android} special-case
+ // __kernel_syscall function handling.
+ // This must be called only after the code area in
+ // question really has been mapped.
+ // May only be called in Admin mode.
+ void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
+
+ // Notify that a mapped area has been unmapped; discard any
+ // associated unwind info. Acquires mRWlock for writing. Note that
+ // to avoid segfaulting the stack-scan unwinder, which inspects code
+ // areas, this must be called before the code area in question is
+ // really unmapped. Note that, unlike NotifyAfterMap(), this
+ // function takes the start and end addresses of the range to be
+ // unmapped, rather than a start and a length parameter. This is so
+ // as to make it possible to notify an unmap for the entire address
+ // space using a single call.
+ // May only be called in Admin mode.
+ void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
+
+ // Apply NotifyBeforeUnmap to the entire address space. This causes
+ // LUL to discard all unwind and executable-area information for the
+ // entire address space.
+ // May only be called in Admin mode.
+ void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); }
+
+ // Returns the number of mappings currently registered.
+ // May only be called in Admin mode.
+ size_t CountMappings();
+
+ // Unwind |aStackImg| starting with the context in |aStartRegs|.
+ // Write the number of frames recovered in *aFramesUsed. Put
+ // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
+ // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
+ // |aFramesAvail| is the size of the two output arrays and hence the
+ // largest possible value of *aFramesUsed. PC values are always
+ // valid, and the unwind will stop when the PC becomes invalid, but
+ // the SP values might be invalid, in which case the value zero will
+ // be written in the relevant frameSPs[] slot.
+ //
+ // This function assumes that the SP values increase as it unwinds
+ // away from the innermost frame -- that is, that the stack grows
+ // down. It monitors SP values as it unwinds to check they
+ // decrease, so as to avoid looping on corrupted stacks.
+ //
+ // May only be called in Unwind mode. Multiple threads may unwind
+ // at once. LUL user is responsible for ensuring that no thread makes
+ // any Admin calls whilst in Unwind mode.
+ // MOZ_CRASHes if the calling thread is not registered for unwinding.
+ //
+ // The calling thread must previously have been registered via a call to
+ // RegisterSampledThread.
+ void Unwind(/*OUT*/ uintptr_t* aFramePCs,
+ /*OUT*/ uintptr_t* aFrameSPs,
+ /*OUT*/ size_t* aFramesUsed,
+ /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail,
+ UnwindRegs* aStartRegs, StackImage* aStackImg);
+
+ // The logging sink. Call to send debug strings to the caller-
+ // specified destination. Can only be called by the Admin thread.
+ void (*mLog)(const char*);
+
+ // Statistics relating to unwinding. These have to be atomic since
+ // unwinding can occur on different threads simultaneously.
+ LULStats<mozilla::Atomic<uint32_t>> mStats;
+
+ // Possibly show the statistics. This may not be called from any
+ // registered sampling thread, since it involves I/O.
+ void MaybeShowStats();
+
+ size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const;
+
+ private:
+ // The statistics counters at the point where they were last printed.
+ LULStats<uint32_t> mStatsPrevious;
+
+ // Are we in admin mode? Initially |true| but changes to |false|
+ // once unwinding begins.
+ bool mAdminMode;
+
+ // The thread ID associated with admin mode. This is the only thread
+ // that is allowed do perform non-Unwind calls on this object. Conversely,
+ // no registered Unwinding thread may be the admin thread. This is so
+ // as to clearly partition the one thread that may do dynamic memory
+ // allocation from the threads that are being sampled, since the latter
+ // absolutely may not do dynamic memory allocation.
+ mozilla::baseprofiler::BaseProfilerThreadId mAdminThreadId;
+
+ // The top level mapping from code address ranges to postprocessed
+ // unwind info. Basically a sorted array of (addr, len, info)
+ // records. This field is updated by NotifyAfterMap and NotifyBeforeUnmap.
+ PriMap* mPriMap;
+
+ // An auxiliary structure that records which address ranges are
+ // mapped r-x, for the benefit of the stack scanner.
+ SegArray* mSegArray;
+
+ // A UniqueStringUniverse that holds all the strdup'd strings created
+ // whilst reading unwind information. This is included so as to make
+ // it possible to free them in ~LUL.
+ UniqueStringUniverse* mUSU;
+};
+
+// Run unit tests on an initialised, loaded-up LUL instance, and print
+// summary results on |aLUL|'s logging sink. Also return the number
+// of tests run in *aNTests and the number that passed in
+// *aNTestsPassed.
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+ LUL* aLUL);
+
+} // namespace lul
+
+#endif // LulMain_h
diff --git a/mozglue/baseprofiler/lul/LulMainInt.h b/mozglue/baseprofiler/lul/LulMainInt.h
new file mode 100644
index 0000000000..c2ee45d73d
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMainInt.h
@@ -0,0 +1,420 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMainInt_h
+#define LulMainInt_h
+
+#include "PlatformMacros.h"
+#include "LulMain.h" // for TaggedUWord
+
+#include "mozilla/Assertions.h"
+
+#include <string>
+#include <vector>
+
+// This file is provides internal interface inside LUL. If you are an
+// end-user of LUL, do not include it in your code. The end-user
+// interface is in LulMain.h.
+
+namespace lul {
+
+using std::vector;
+
+////////////////////////////////////////////////////////////////
+// DW_REG_ constants //
+////////////////////////////////////////////////////////////////
+
+// These are the Dwarf CFI register numbers, as (presumably) defined
+// in the ELF ABI supplements for each architecture.
+
+enum DW_REG_NUMBER {
+ // No real register has this number. It's convenient to be able to
+ // treat the CFA (Canonical Frame Address) as "just another
+ // register", though.
+ DW_REG_CFA = -1,
+#if defined(GP_ARCH_arm)
+ // ARM registers
+ DW_REG_ARM_R7 = 7,
+ DW_REG_ARM_R11 = 11,
+ DW_REG_ARM_R12 = 12,
+ DW_REG_ARM_R13 = 13,
+ DW_REG_ARM_R14 = 14,
+ DW_REG_ARM_R15 = 15,
+#elif defined(GP_ARCH_arm64)
+ // aarch64 registers
+ DW_REG_AARCH64_X29 = 29,
+ DW_REG_AARCH64_X30 = 30,
+ DW_REG_AARCH64_SP = 31,
+#elif defined(GP_ARCH_amd64)
+ // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
+ // combined, a merged set of register constants is needed.
+ DW_REG_INTEL_XBP = 6,
+ DW_REG_INTEL_XSP = 7,
+ DW_REG_INTEL_XIP = 16,
+#elif defined(GP_ARCH_x86)
+ DW_REG_INTEL_XBP = 5,
+ DW_REG_INTEL_XSP = 4,
+ DW_REG_INTEL_XIP = 8,
+#elif defined(GP_ARCH_mips64)
+ DW_REG_MIPS_SP = 29,
+ DW_REG_MIPS_FP = 30,
+ DW_REG_MIPS_PC = 34,
+#else
+# error "Unknown arch"
+#endif
+};
+
+////////////////////////////////////////////////////////////////
+// PfxExpr //
+////////////////////////////////////////////////////////////////
+
+enum PfxExprOp {
+ // meaning of mOperand effect on stack
+ PX_Start, // bool start-with-CFA? start, with CFA on stack, or not
+ PX_End, // none stop; result is at top of stack
+ PX_SImm32, // int32 push signed int32
+ PX_DwReg, // DW_REG_NUMBER push value of the specified reg
+ PX_Deref, // none pop X ; push *X
+ PX_Add, // none pop X ; pop Y ; push Y + X
+ PX_Sub, // none pop X ; pop Y ; push Y - X
+ PX_And, // none pop X ; pop Y ; push Y & X
+ PX_Or, // none pop X ; pop Y ; push Y | X
+ PX_CmpGES, // none pop X ; pop Y ; push (Y >=s X) ? 1 : 0
+ PX_Shl // none pop X ; pop Y ; push Y << X
+};
+
+struct PfxInstr {
+ PfxInstr(PfxExprOp opcode, int32_t operand)
+ : mOpcode(opcode), mOperand(operand) {}
+ explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {}
+ bool operator==(const PfxInstr& other) const {
+ return mOpcode == other.mOpcode && mOperand == other.mOperand;
+ }
+ PfxExprOp mOpcode;
+ int32_t mOperand;
+};
+
+static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly");
+
+// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start].
+// In the case of any mishap (stack over/underflow, running off the end of
+// the instruction vector, obviously malformed sequences),
+// return an invalid TaggedUWord.
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+ TaggedUWord aCFA, const StackImage* aStackImg,
+ const vector<PfxInstr>& aPfxInstrs);
+
+////////////////////////////////////////////////////////////////
+// LExpr //
+////////////////////////////////////////////////////////////////
+
+// An expression -- very primitive. Denotes either "register +
+// offset", a dereferenced version of the same, or a reference to a
+// prefix expression stored elsewhere. So as to allow convenient
+// handling of Dwarf-derived unwind info, the register may also denote
+// the CFA. A large number of these need to be stored, so we ensure
+// it fits into 8 bytes. See comment below on RuleSet to see how
+// expressions fit into the bigger picture.
+
+enum LExprHow {
+ UNKNOWN = 0, // This LExpr denotes no value.
+ NODEREF, // Value is (mReg + mOffset).
+ DEREF, // Value is *(mReg + mOffset).
+ PFXEXPR // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset])
+};
+
+inline static const char* NameOf_LExprHow(LExprHow how) {
+ switch (how) {
+ case UNKNOWN:
+ return "UNKNOWN";
+ case NODEREF:
+ return "NODEREF";
+ case DEREF:
+ return "DEREF";
+ case PFXEXPR:
+ return "PFXEXPR";
+ default:
+ return "LExpr-??";
+ }
+}
+
+struct LExpr {
+ // Denotes an expression with no value.
+ LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {}
+
+ // Denotes any expressible expression.
+ LExpr(LExprHow how, int16_t reg, int32_t offset)
+ : mHow(how), mReg(reg), mOffset(offset) {
+ switch (how) {
+ case UNKNOWN:
+ MOZ_ASSERT(reg == 0 && offset == 0);
+ break;
+ case NODEREF:
+ break;
+ case DEREF:
+ break;
+ case PFXEXPR:
+ MOZ_ASSERT(reg == 0 && offset >= 0);
+ break;
+ default:
+ MOZ_ASSERT(0, "LExpr::LExpr: invalid how");
+ }
+ }
+
+ // Change the offset for an expression that references memory.
+ LExpr add_delta(long delta) {
+ MOZ_ASSERT(mHow == NODEREF);
+ // If this is a non-debug build and the above assertion would have
+ // failed, at least return LExpr() so that the machinery that uses
+ // the resulting expression fails in a repeatable way.
+ return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta)
+ : LExpr(); // Gone bad
+ }
+
+ // Dereference an expression that denotes a memory address.
+ LExpr deref() {
+ MOZ_ASSERT(mHow == NODEREF);
+ // Same rationale as for add_delta().
+ return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset)
+ : LExpr(); // Gone bad
+ }
+
+ // Print a rule for recovery of |aNewReg| whose recovered value
+ // is this LExpr.
+ std::string ShowRule(const char* aNewReg) const;
+
+ // Evaluate this expression, producing a TaggedUWord. |aOldRegs|
+ // holds register values that may be referred to by the expression.
+ // |aCFA| holds the CFA value, if any, that applies. |aStackImg|
+ // contains a chuck of stack that will be consulted if the expression
+ // references memory. |aPfxInstrs| holds the vector of PfxInstrs
+ // that will be consulted if this is a PFXEXPR.
+ // RUNS IN NO-MALLOC CONTEXT
+ TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+ const StackImage* aStackImg,
+ const vector<PfxInstr>* aPfxInstrs) const;
+
+ // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then
+ // it denotes the CFA. All other allowed values for |mReg| are
+ // nonnegative and are DW_REG_ values.
+ LExprHow mHow : 8;
+ int16_t mReg; // A DW_REG_ value
+ int32_t mOffset; // 32-bit signed offset should be more than enough.
+};
+
+static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly");
+
+////////////////////////////////////////////////////////////////
+// RuleSet //
+////////////////////////////////////////////////////////////////
+
+// This is platform-dependent. For some address range, describes how
+// to recover the CFA and then how to recover the registers for the
+// previous frame.
+//
+// The set of LExprs contained in a given RuleSet describe a DAG which
+// says how to compute the caller's registers ("new registers") from
+// the callee's registers ("old registers"). The DAG can contain a
+// single internal node, which is the value of the CFA for the callee.
+// It would be possible to construct a DAG that omits the CFA, but
+// including it makes the summarisers simpler, and the Dwarf CFI spec
+// has the CFA as a central concept.
+//
+// For this to make sense, |mCfaExpr| can't have
+// |mReg| == DW_REG_CFA since we have no previous value for the CFA.
+// All of the other |Expr| fields can -- and usually do -- specify
+// |mReg| == DW_REG_CFA.
+//
+// With that in place, the unwind algorithm proceeds as follows.
+//
+// (0) Initially: we have values for the old registers, and a memory
+// image.
+//
+// (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed
+// value to the set of "old registers".
+//
+// (2) Compute values for the registers by evaluating all of the other
+// |Expr| fields in the RuleSet. These can depend on both the old
+// register values and the just-computed CFA.
+//
+// If we are unwinding without computing a CFA, perhaps because the
+// RuleSets are derived from EXIDX instead of Dwarf, then
+// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will
+// be invalid -- that is, TaggedUWord() -- and so any attempt to use
+// that will result in the same value. But that's OK because the
+// RuleSet would make no sense if depended on the CFA but specified no
+// way to compute it.
+//
+// A RuleSet is not allowed to cover zero address range. Having zero
+// length would break binary searching in SecMaps and PriMaps.
+
+class RuleSet {
+ public:
+ RuleSet();
+ void Print(void (*aLog)(const char*)) const;
+
+ // Find the LExpr* for a given DW_REG_ value in this class.
+ LExpr* ExprForRegno(DW_REG_NUMBER aRegno);
+
+ uintptr_t mAddr;
+ uintptr_t mLen;
+ // How to compute the CFA.
+ LExpr mCfaExpr;
+ // How to compute caller register values. These may reference the
+ // value defined by |mCfaExpr|.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ LExpr mXipExpr; // return address
+ LExpr mXspExpr;
+ LExpr mXbpExpr;
+#elif defined(GP_ARCH_arm)
+ LExpr mR15expr; // return address
+ LExpr mR14expr;
+ LExpr mR13expr;
+ LExpr mR12expr;
+ LExpr mR11expr;
+ LExpr mR7expr;
+#elif defined(GP_ARCH_arm64)
+ LExpr mX29expr; // frame pointer register
+ LExpr mX30expr; // link register
+ LExpr mSPexpr;
+#elif defined(GP_ARCH_mips64)
+ LExpr mPCexpr;
+ LExpr mFPexpr;
+ LExpr mSPexpr;
+#else
+# error "Unknown arch"
+#endif
+};
+
+// Returns |true| for Dwarf register numbers which are members
+// of the set of registers that LUL unwinds on this target.
+static inline bool registerIsTracked(DW_REG_NUMBER reg) {
+ switch (reg) {
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+ case DW_REG_INTEL_XBP:
+ case DW_REG_INTEL_XSP:
+ case DW_REG_INTEL_XIP:
+ return true;
+#elif defined(GP_ARCH_arm)
+ case DW_REG_ARM_R7:
+ case DW_REG_ARM_R11:
+ case DW_REG_ARM_R12:
+ case DW_REG_ARM_R13:
+ case DW_REG_ARM_R14:
+ case DW_REG_ARM_R15:
+ return true;
+#elif defined(GP_ARCH_arm64)
+ case DW_REG_AARCH64_X29:
+ case DW_REG_AARCH64_X30:
+ case DW_REG_AARCH64_SP:
+ return true;
+#elif defined(GP_ARCH_mips64)
+ case DW_REG_MIPS_FP:
+ case DW_REG_MIPS_SP:
+ case DW_REG_MIPS_PC:
+ return true;
+#else
+# error "Unknown arch"
+#endif
+ default:
+ return false;
+ }
+}
+
+////////////////////////////////////////////////////////////////
+// SecMap //
+////////////////////////////////////////////////////////////////
+
+// A SecMap may have zero address range, temporarily, whilst RuleSets
+// are being added to it. But adding a zero-range SecMap to a PriMap
+// will make it impossible to maintain the total order of the PriMap
+// entries, and so that can't be allowed to happen.
+
+class SecMap {
+ public:
+ // These summarise the contained mRuleSets, in that they give
+ // exactly the lowest and highest addresses that any of the entries
+ // in this SecMap cover. Hence invariants:
+ //
+ // mRuleSets is nonempty
+ // <=> mSummaryMinAddr <= mSummaryMaxAddr
+ // && mSummaryMinAddr == mRuleSets[0].mAddr
+ // && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr
+ // + mRuleSets[#rulesets-1].mLen - 1;
+ //
+ // This requires that no RuleSet has zero length.
+ //
+ // mRuleSets is empty
+ // <=> mSummaryMinAddr > mSummaryMaxAddr
+ //
+ // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely,
+ // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote
+ // this case.
+
+ explicit SecMap(void (*aLog)(const char*));
+ ~SecMap();
+
+ // Binary search mRuleSets to find one that brackets |ia|, or nullptr
+ // if none is found. It's not allowable to do this until PrepareRuleSets
+ // has been called first.
+ RuleSet* FindRuleSet(uintptr_t ia);
+
+ // Add a RuleSet to the collection. The rule is copied in. Calling
+ // this makes the map non-searchable.
+ void AddRuleSet(const RuleSet* rs);
+
+ // Add a PfxInstr to the vector of such instrs, and return the index
+ // in the vector. Calling this makes the map non-searchable.
+ uint32_t AddPfxInstr(PfxInstr pfxi);
+
+ // Returns the entire vector of PfxInstrs.
+ const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; }
+
+ // Prepare the map for searching. Also, remove any rules for code
+ // address ranges which don't fall inside [start, +len). |len| may
+ // not be zero.
+ void PrepareRuleSets(uintptr_t start, size_t len);
+
+ bool IsEmpty();
+
+ size_t Size() { return mRuleSets.size(); }
+
+ size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+ // The min and max addresses of the addresses in the contained
+ // RuleSets. See comment above for invariants.
+ uintptr_t mSummaryMinAddr;
+ uintptr_t mSummaryMaxAddr;
+
+ private:
+ // False whilst adding entries; true once it is safe to call FindRuleSet.
+ // Transition (false->true) is caused by calling PrepareRuleSets().
+ bool mUsable;
+
+ // A vector of RuleSets, sorted, nonoverlapping (post Prepare()).
+ vector<RuleSet> mRuleSets;
+
+ // A vector of PfxInstrs, which are referred to by the RuleSets.
+ // These are provided as a representation of Dwarf expressions
+ // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression),
+ // are relatively expensive to evaluate, and and are therefore
+ // expected to be used only occasionally.
+ //
+ // The vector holds a bunch of separate PfxInstr programs, each one
+ // starting with a PX_Start and terminated by a PX_End, all
+ // concatenated together. When a RuleSet can't recover a value
+ // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is
+ // the index in this vector of start of the necessary PfxInstr program.
+ vector<PfxInstr> mPfxInstrs;
+
+ // A logging sink, for debugging.
+ void (*mLog)(const char*);
+};
+
+} // namespace lul
+
+#endif // ndef LulMainInt_h
diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.cpp b/mozglue/baseprofiler/lul/platform-linux-lul.cpp
new file mode 100644
index 0000000000..a9ee65858d
--- /dev/null
+++ b/mozglue/baseprofiler/lul/platform-linux-lul.cpp
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "AutoObjectMapper.h"
+#include "BaseProfiler.h"
+#include "BaseProfilerSharedLibraries.h"
+#include "platform.h"
+#include "PlatformMacros.h"
+#include "LulMain.h"
+
+// Contains miscellaneous helpers that are used to connect the Gecko Profiler
+// and LUL.
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL) {
+ MOZ_ASSERT(aLUL->CountMappings() == 0);
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+ SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+
+ for (size_t i = 0; i < info.GetSize(); i++) {
+ const SharedLibrary& lib = info.GetEntry(i);
+
+ std::string nativePath = lib.GetDebugPath();
+
+ // We can use the standard POSIX-based mapper.
+ AutoObjectMapperPOSIX mapper(aLUL->mLog);
+
+ // Ask |mapper| to map the object. Then hand its mapped address
+ // to NotifyAfterMap().
+ void* image = nullptr;
+ size_t size = 0;
+ bool ok = mapper.Map(&image, &size, nativePath);
+ if (ok && image && size > 0) {
+ aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(),
+ nativePath.c_str(), image);
+ } else if (!ok && lib.GetDebugName().empty()) {
+ // The object has no name and (as a consequence) the mapper failed to map
+ // it. This happens on Linux, where GetInfoForSelf() produces such a
+ // mapping for the VDSO. This is a problem on x86-{linux,android} because
+ // lack of knowledge about the mapped area inhibits LUL's special
+ // __kernel_syscall handling. Hence notify |aLUL| at least of the
+ // mapping, even though it can't read any unwind information for the area.
+ aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart());
+ }
+
+ // |mapper| goes out of scope at this point and so its destructor
+ // unmaps the object.
+ }
+
+#else
+# error "Unknown platform"
+#endif
+}
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str) {
+ // These are only printed when Verbose logging is enabled (e.g. with
+ // MOZ_BASE_PROFILER_VERBOSE_LOGGING=1). This is because LUL's logging is much
+ // more verbose than the rest of the profiler's logging, which occurs at the
+ // Info (3) and Debug (4) levels.
+ // FIXME: This causes a build failure in memory/replace/dmd/test/SmokeDMD (!)
+ // and other places, because it doesn't link the implementation in
+ // platform.cpp.
+ // VERBOSE_LOG("[%d] %s", profiler_current_process_id(), str);
+}
diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.h b/mozglue/baseprofiler/lul/platform-linux-lul.h
new file mode 100644
index 0000000000..b54e80edcf
--- /dev/null
+++ b/mozglue/baseprofiler/lul/platform-linux-lul.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PLATFORM_LINUX_LUL_H
+#define MOZ_PLATFORM_LINUX_LUL_H
+
+#include "platform.h"
+
+#include "BaseProfiler.h"
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL);
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str);
+
+#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */
diff --git a/mozglue/baseprofiler/moz.build b/mozglue/baseprofiler/moz.build
new file mode 100644
index 0000000000..2c75814e5a
--- /dev/null
+++ b/mozglue/baseprofiler/moz.build
@@ -0,0 +1,133 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This is pretty much a copy from tools/profiler, cut down to exclude anything
+# that cannot work in mozglue (because they are totally dependent on libxul-
+# specific code).
+# All exported headers now prefixed with "Base" to avoid #include name clashes.
+
+if CONFIG["MOZ_GECKO_PROFILER"]:
+ DEFINES["IMPL_MFBT"] = True
+ EXPORTS += [
+ "public/BaseProfilingStack.h",
+ ]
+ UNIFIED_SOURCES += [
+ "core/PageInformation.cpp",
+ "core/platform.cpp",
+ "core/ProfileBuffer.cpp",
+ "core/ProfileBufferEntry.cpp",
+ "core/ProfiledThreadData.cpp",
+ "core/ProfileJSONWriter.cpp",
+ "core/ProfilerBacktrace.cpp",
+ "core/ProfilerMarkers.cpp",
+ "core/ProfilingCategory.cpp",
+ "core/ProfilingStack.cpp",
+ "core/RegisteredThread.cpp",
+ ]
+
+ if CONFIG["OS_TARGET"] in ("Android", "Linux", "FreeBSD"):
+ if CONFIG["TARGET_CPU"] in ("arm", "aarch64", "x86", "x86_64", "mips64"):
+ UNIFIED_SOURCES += [
+ "lul/AutoObjectMapper.cpp",
+ "lul/LulCommon.cpp",
+ "lul/LulDwarf.cpp",
+ "lul/LulDwarfSummariser.cpp",
+ "lul/LulElf.cpp",
+ "lul/LulMain.cpp",
+ "lul/platform-linux-lul.cpp",
+ ]
+ # These files cannot be built in unified mode because of name clashes with mozglue headers on Android.
+ SOURCES += [
+ "core/shared-libraries-linux.cc",
+ ]
+ if CONFIG["TARGET_CPU"] == "arm" and CONFIG["OS_TARGET"] != "FreeBSD":
+ SOURCES += [
+ "core/EHABIStackWalk.cpp",
+ ]
+ elif CONFIG["OS_TARGET"] == "Darwin":
+ UNIFIED_SOURCES += [
+ "core/shared-libraries-macos.cc",
+ ]
+ elif CONFIG["OS_TARGET"] == "WINNT":
+ SOURCES += [
+ "core/shared-libraries-win32.cc",
+ ]
+
+ LOCAL_INCLUDES += [
+ "/mozglue/baseprofiler/core/",
+ "/mozglue/linker",
+ ]
+
+ if CONFIG["OS_TARGET"] == "Android":
+ DEFINES["ANDROID_NDK_MAJOR_VERSION"] = CONFIG["ANDROID_NDK_MAJOR_VERSION"]
+ DEFINES["ANDROID_NDK_MINOR_VERSION"] = CONFIG["ANDROID_NDK_MINOR_VERSION"]
+
+GeneratedFile(
+ "public/ProfilingCategoryList.h",
+ script="build/generate_profiling_categories.py",
+ entry_point="generate_macro_header",
+ inputs=["build/profiling_categories.yaml"],
+)
+
+EXPORTS += [
+ "!public/ProfilingCategoryList.h",
+ "public/BaseProfiler.h",
+ "public/BaseProfilerSharedLibraries.h",
+ "public/BaseProfilingCategory.h",
+]
+
+EXPORTS.mozilla += [
+ "public/BaseAndGeckoProfilerDetail.h",
+ "public/BaseProfileJSONWriter.h",
+ "public/BaseProfilerCounts.h",
+ "public/BaseProfilerDetail.h",
+ "public/BaseProfilerLabels.h",
+ "public/BaseProfilerMarkers.h",
+ "public/BaseProfilerMarkersDetail.h",
+ "public/BaseProfilerMarkersPrerequisites.h",
+ "public/BaseProfilerMarkerTypes.h",
+ "public/BaseProfilerRAIIMacro.h",
+ "public/BaseProfilerState.h",
+ "public/BaseProfilerUtils.h",
+ "public/FailureLatch.h",
+ "public/leb128iterator.h",
+ "public/ModuloBuffer.h",
+ "public/PowerOfTwo.h",
+ "public/ProfileBufferChunk.h",
+ "public/ProfileBufferChunkManager.h",
+ "public/ProfileBufferChunkManagerSingle.h",
+ "public/ProfileBufferChunkManagerWithLocalLimit.h",
+ "public/ProfileBufferControlledChunkManager.h",
+ "public/ProfileBufferEntryKinds.h",
+ "public/ProfileBufferEntrySerialization.h",
+ "public/ProfileBufferIndex.h",
+ "public/ProfileChunkedBuffer.h",
+ "public/ProfileChunkedBufferDetail.h",
+ "public/ProfilerBufferSize.h",
+ "public/ProgressLogger.h",
+ "public/ProportionValue.h",
+]
+
+UNIFIED_SOURCES += [
+ "core/BaseAndGeckoProfilerDetail.cpp",
+ "core/ProfilerUtils.cpp",
+]
+
+if CONFIG["MOZ_VTUNE"]:
+ DEFINES["MOZ_VTUNE_INSTRUMENTATION"] = True
+ UNIFIED_SOURCES += [
+ "core/VTuneProfiler.cpp",
+ ]
+
+FINAL_LIBRARY = "mozglue"
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+ CXXFLAGS += [
+ "-Wno-ignored-qualifiers", # due to use of breakpad headers
+ ]
+
+with Files("**"):
+ BUG_COMPONENT = ("Core", "Gecko Profiler")
diff --git a/mozglue/baseprofiler/public/BaseAndGeckoProfilerDetail.h b/mozglue/baseprofiler/public/BaseAndGeckoProfilerDetail.h
new file mode 100644
index 0000000000..f3cb539f64
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseAndGeckoProfilerDetail.h
@@ -0,0 +1,67 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Internal Base and Gecko Profiler utilities.
+// It should declare or define things that are used in both profilers, but not
+// needed outside of the profilers.
+// In particular, it is *not* included in popular headers like BaseProfiler.h
+// and GeckoProfiler.h, to avoid rebuilding the world when this is modified.
+
+#ifndef BaseAndGeckoProfilerDetail_h
+#define BaseAndGeckoProfilerDetail_h
+
+#include "mozilla/BaseProfilerUtils.h"
+#include "mozilla/Span.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Types.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+
+class ProfileBufferChunkManagerWithLocalLimit;
+
+// Centrally defines the version of the gecko profiler JSON format.
+const int GECKO_PROFILER_FORMAT_VERSION = 29;
+
+namespace baseprofiler::detail {
+
+[[nodiscard]] MFBT_API TimeStamp GetProfilingStartTime();
+
+[[nodiscard]] MFBT_API UniquePtr<ProfileBufferChunkManagerWithLocalLimit>
+ExtractBaseProfilerChunkManager();
+
+// If the current thread is registered, returns its registration time, otherwise
+// a null timestamp.
+[[nodiscard]] MFBT_API TimeStamp GetThreadRegistrationTime();
+
+} // namespace baseprofiler::detail
+
+namespace profiler::detail {
+
+// True if the filter is exactly "pid:<aPid>".
+[[nodiscard]] MFBT_API bool FilterHasPid(
+ const char* aFilter, baseprofiler::BaseProfilerProcessId aPid =
+ baseprofiler::profiler_current_process_id());
+
+// Only true if the filters only contain "pid:..." strings, and *none* of them
+// is exactly "pid:<aPid>". E.g.:
+// - [], 123 -> false (no pids)
+// - ["main"], 123 -> false (not all pids)
+// - ["main", "pid:123"], 123 -> false (not all pids)
+// - ["pid:123"], 123 -> false (all pids, including "pid:123")
+// - ["pid:123", "pid:456"], 123 -> false (all pids, including "pid:123")
+// - ["pid:456"], 123 -> true (all pids, but no "pid:123")
+// - ["pid:456", "pid:789"], 123 -> true (all pids, but no "pid:123")
+[[nodiscard]] MFBT_API bool FiltersExcludePid(
+ Span<const char* const> aFilters,
+ baseprofiler::BaseProfilerProcessId aPid =
+ baseprofiler::profiler_current_process_id());
+
+} // namespace profiler::detail
+
+} // namespace mozilla
+
+#endif // BaseAndGeckoProfilerDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfileJSONWriter.h b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
new file mode 100644
index 0000000000..00a2926366
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
@@ -0,0 +1,600 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASEPROFILEJSONWRITER_H
+#define BASEPROFILEJSONWRITER_H
+
+#include "mozilla/FailureLatch.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/JSONWriter.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/ProgressLogger.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include <functional>
+#include <ostream>
+#include <string_view>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// On average, profile JSONs are large enough such that we want to avoid
+// reallocating its buffer when expanding. Additionally, the contents of the
+// profile are not accessed until the profile is entirely written. For these
+// reasons we use a chunked writer that keeps an array of chunks, which is
+// concatenated together after writing is finished.
+class ChunkedJSONWriteFunc final : public JSONWriteFunc, public FailureLatch {
+ public:
+ friend class SpliceableJSONWriter;
+
+ explicit ChunkedJSONWriteFunc(FailureLatch& aFailureLatch)
+ : mFailureLatch(WrapNotNullUnchecked(&aFailureLatch)) {
+ (void)AllocChunk(kChunkSize);
+ }
+
+ [[nodiscard]] bool IsEmpty() const {
+ MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && mChunkList.length() == 0 &&
+ mChunkLengths.length() == 0);
+ return !mChunkPtr;
+ }
+
+ // Length of data written so far, excluding null terminator.
+ [[nodiscard]] size_t Length() const {
+ MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+ size_t totalLen = 0;
+ for (size_t i = 0; i < mChunkLengths.length(); i++) {
+ MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]);
+ totalLen += mChunkLengths[i];
+ }
+ return totalLen;
+ }
+
+ void Write(const Span<const char>& aStr) final {
+ if (Failed()) {
+ return;
+ }
+
+ MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd);
+ MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back());
+ MOZ_ASSERT(*mChunkPtr == '\0');
+
+ // Most strings to be written are small, but subprocess profiles (e.g.,
+ // from the content process in e10s) may be huge. If the string is larger
+ // than a chunk, allocate its own chunk.
+ char* newPtr;
+ if (aStr.size() >= kChunkSize) {
+ if (!AllocChunk(aStr.size() + 1)) {
+ return;
+ }
+ newPtr = mChunkPtr + aStr.size();
+ } else {
+ newPtr = mChunkPtr + aStr.size();
+ if (newPtr >= mChunkEnd) {
+ if (!AllocChunk(kChunkSize)) {
+ return;
+ }
+ newPtr = mChunkPtr + aStr.size();
+ }
+ }
+
+ memcpy(mChunkPtr, aStr.data(), aStr.size());
+ *newPtr = '\0';
+ mChunkPtr = newPtr;
+ mChunkLengths.back() += aStr.size();
+ }
+
+ [[nodiscard]] bool CopyDataIntoLazilyAllocatedBuffer(
+ const std::function<char*(size_t)>& aAllocator) const {
+ // Request a buffer for the full content plus a null terminator.
+ if (Failed()) {
+ return false;
+ }
+
+ char* ptr = aAllocator(Length() + 1);
+
+ if (!ptr) {
+ // Failed to allocate memory.
+ return false;
+ }
+
+ for (size_t i = 0; i < mChunkList.length(); i++) {
+ size_t len = mChunkLengths[i];
+ memcpy(ptr, mChunkList[i].get(), len);
+ ptr += len;
+ }
+ *ptr = '\0';
+ return true;
+ }
+
+ [[nodiscard]] UniquePtr<char[]> CopyData() const {
+ UniquePtr<char[]> c;
+ if (!CopyDataIntoLazilyAllocatedBuffer([&](size_t allocationSize) {
+ c = MakeUnique<char[]>(allocationSize);
+ return c.get();
+ })) {
+ // Something went wrong, make sure the returned pointer is null even if
+ // the allocation happened.
+ c = nullptr;
+ }
+ return c;
+ }
+
+ void Take(ChunkedJSONWriteFunc&& aOther) {
+ SetFailureFrom(aOther);
+ if (Failed()) {
+ return;
+ }
+
+ for (size_t i = 0; i < aOther.mChunkList.length(); i++) {
+ MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i]));
+ MOZ_ALWAYS_TRUE(mChunkList.append(std::move(aOther.mChunkList[i])));
+ }
+ mChunkPtr = mChunkList.back().get() + mChunkLengths.back();
+ mChunkEnd = mChunkPtr;
+ aOther.Clear();
+ }
+
+ FAILURELATCH_IMPL_PROXY(*mFailureLatch)
+
+ // Change the failure latch to be used here, and if the previous latch was
+ // already in failure state, set that failure in the new latch.
+ // This allows using this WriteFunc in isolation, before attempting to bring
+ // it into another operation group with its own FailureLatch.
+ void ChangeFailureLatchAndForwardState(FailureLatch& aFailureLatch) {
+ aFailureLatch.SetFailureFrom(*this);
+ mFailureLatch = WrapNotNullUnchecked(&aFailureLatch);
+ }
+
+ private:
+ void Clear() {
+ mChunkPtr = nullptr;
+ mChunkEnd = nullptr;
+ mChunkList.clear();
+ mChunkLengths.clear();
+ }
+
+ void ClearAndSetFailure(std::string aFailure) {
+ Clear();
+ SetFailure(std::move(aFailure));
+ }
+
+ [[nodiscard]] bool ClearAndSetFailureAndFalse(std::string aFailure) {
+ ClearAndSetFailure(std::move(aFailure));
+ return false;
+ }
+
+ [[nodiscard]] bool AllocChunk(size_t aChunkSize) {
+ if (Failed()) {
+ if (mChunkPtr) {
+ // FailureLatch is in failed state, but chunks have not been cleared yet
+ // (error must have happened elsewhere).
+ Clear();
+ }
+ return false;
+ }
+
+ MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+ UniquePtr<char[]> newChunk = MakeUniqueFallible<char[]>(aChunkSize);
+ if (!newChunk) {
+ return ClearAndSetFailureAndFalse(
+ "OOM in ChunkedJSONWriteFunc::AllocChunk allocating new chunk");
+ }
+ mChunkPtr = newChunk.get();
+ mChunkEnd = mChunkPtr + aChunkSize;
+ *mChunkPtr = '\0';
+ if (!mChunkLengths.append(0)) {
+ return ClearAndSetFailureAndFalse(
+ "OOM in ChunkedJSONWriteFunc::AllocChunk appending length");
+ }
+ if (!mChunkList.append(std::move(newChunk))) {
+ return ClearAndSetFailureAndFalse(
+ "OOM in ChunkedJSONWriteFunc::AllocChunk appending new chunk");
+ }
+ return true;
+ }
+
+ static const size_t kChunkSize = 4096 * 512;
+
+ // Pointer for writing inside the current chunk.
+ //
+ // The current chunk is always at the back of mChunkList, i.e.,
+ // mChunkList.back() <= mChunkPtr <= mChunkEnd.
+ char* mChunkPtr = nullptr;
+
+ // Pointer to the end of the current chunk.
+ //
+ // The current chunk is always at the back of mChunkList, i.e.,
+ // mChunkEnd >= mChunkList.back() + mChunkLengths.back().
+ char* mChunkEnd = nullptr;
+
+ // List of chunks and their lengths.
+ //
+ // For all i, the length of the string in mChunkList[i] is
+ // mChunkLengths[i].
+ Vector<UniquePtr<char[]>> mChunkList;
+ Vector<size_t> mChunkLengths;
+
+ NotNull<FailureLatch*> mFailureLatch;
+};
+
+struct OStreamJSONWriteFunc final : public JSONWriteFunc {
+ explicit OStreamJSONWriteFunc(std::ostream& aStream) : mStream(aStream) {}
+
+ void Write(const Span<const char>& aStr) final {
+ std::string_view sv(aStr.data(), aStr.size());
+ mStream << sv;
+ }
+
+ std::ostream& mStream;
+};
+
+class UniqueJSONStrings;
+
+class SpliceableJSONWriter : public JSONWriter, public FailureLatch {
+ public:
+ SpliceableJSONWriter(JSONWriteFunc& aWriter, FailureLatch& aFailureLatch)
+ : JSONWriter(aWriter, JSONWriter::SingleLineStyle),
+ mFailureLatch(WrapNotNullUnchecked(&aFailureLatch)) {}
+
+ SpliceableJSONWriter(UniquePtr<JSONWriteFunc> aWriter,
+ FailureLatch& aFailureLatch)
+ : JSONWriter(std::move(aWriter), JSONWriter::SingleLineStyle),
+ mFailureLatch(WrapNotNullUnchecked(&aFailureLatch)) {}
+
+ void StartBareList() { StartCollection(scEmptyString, scEmptyString); }
+
+ void EndBareList() { EndCollection(scEmptyString); }
+
+ // Output a time (int64_t given in nanoseconds) in milliseconds. trim zeroes.
+ // E.g.: 1'234'567'890 -> "1234.56789"
+ void TimeI64NsProperty(const Span<const char>& aMaybePropertyName,
+ int64_t aTime_ns) {
+ if (aTime_ns == 0) {
+ Scalar(aMaybePropertyName, MakeStringSpan("0"));
+ return;
+ }
+
+ static constexpr int64_t million = 1'000'000;
+ const int64_t absNanos = std::abs(aTime_ns);
+ const int64_t integerMilliseconds = absNanos / million;
+ auto remainderNanoseconds = static_cast<uint32_t>(absNanos % million);
+
+ // Plenty enough to fit INT64_MIN (-9223372036854775808).
+ static constexpr size_t DIGITS_MAX = 23;
+ char buf[DIGITS_MAX + 1];
+ int len =
+ snprintf(buf, DIGITS_MAX, (aTime_ns >= 0) ? "%" PRIu64 : "-%" PRIu64,
+ integerMilliseconds);
+ if (remainderNanoseconds != 0) {
+ buf[len++] = '.';
+ // Output up to 6 fractional digits. Exit early if the rest would
+ // be trailing zeros.
+ uint32_t powerOfTen = static_cast<uint32_t>(million / 10);
+ for (;;) {
+ auto digit = remainderNanoseconds / powerOfTen;
+ buf[len++] = '0' + static_cast<char>(digit);
+ remainderNanoseconds %= powerOfTen;
+ if (remainderNanoseconds == 0) {
+ break;
+ }
+ powerOfTen /= 10;
+ if (powerOfTen == 0) {
+ break;
+ }
+ }
+ }
+
+ Scalar(aMaybePropertyName, Span<const char>(buf, len));
+ }
+
+ // Output a (double) time in milliseconds, with at best nanosecond precision.
+ void TimeDoubleMsProperty(const Span<const char>& aMaybePropertyName,
+ double aTime_ms) {
+ const double dTime_ns = aTime_ms * 1'000'000.0;
+ // Make sure it's well within int64_t range.
+ // 2^63 nanoseconds is almost 300 years; these times are relative to
+ // firefox startup, this should be enough for most uses.
+ if (dTime_ns >= 0.0) {
+ MOZ_RELEASE_ASSERT(dTime_ns < double(INT64_MAX - 1));
+ } else {
+ MOZ_RELEASE_ASSERT(dTime_ns > double(INT64_MIN + 2));
+ }
+ // Round to nearest integer nanosecond. The conversion to integer truncates
+ // the fractional part, so first we need to push it 0.5 away from zero.
+ const int64_t iTime_ns =
+ (dTime_ns >= 0.0) ? int64_t(dTime_ns + 0.5) : int64_t(dTime_ns - 0.5);
+ TimeI64NsProperty(aMaybePropertyName, iTime_ns);
+ }
+
+ // Output a (double) time in milliseconds, with at best nanosecond precision.
+ void TimeDoubleMsElement(double aTime_ms) {
+ TimeDoubleMsProperty(nullptr, aTime_ms);
+ }
+
+ // This function must be used to correctly stream timestamps in profiles.
+ // Null timestamps don't output anything.
+ void TimeProperty(const Span<const char>& aMaybePropertyName,
+ const TimeStamp& aTime) {
+ if (!aTime.IsNull()) {
+ TimeDoubleMsProperty(
+ aMaybePropertyName,
+ (aTime - TimeStamp::ProcessCreation()).ToMilliseconds());
+ }
+ }
+
+ void NullElements(uint32_t aCount) {
+ for (uint32_t i = 0; i < aCount; i++) {
+ NullElement();
+ }
+ }
+
+ void Splice(const Span<const char>& aStr) {
+ Separator();
+ WriteFunc().Write(aStr);
+ mNeedComma[mDepth] = true;
+ }
+
+ void Splice(const char* aStr, size_t aLen) {
+ Separator();
+ WriteFunc().Write(Span<const char>(aStr, aLen));
+ mNeedComma[mDepth] = true;
+ }
+
+ // Splice the given JSON directly in, without quoting.
+ void SplicedJSONProperty(const Span<const char>& aMaybePropertyName,
+ const Span<const char>& aJsonValue) {
+ Scalar(aMaybePropertyName, aJsonValue);
+ }
+
+ void CopyAndSplice(const ChunkedJSONWriteFunc& aFunc) {
+ Separator();
+ for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+ WriteFunc().Write(
+ Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+ }
+ mNeedComma[mDepth] = true;
+ }
+
+ // Takes the chunks from aFunc and write them. If move is not possible
+ // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its
+ // storage cleared.
+ virtual void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) {
+ Separator();
+ for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+ WriteFunc().Write(
+ Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+ }
+ aFunc.mChunkPtr = nullptr;
+ aFunc.mChunkEnd = nullptr;
+ aFunc.mChunkList.clear();
+ aFunc.mChunkLengths.clear();
+ mNeedComma[mDepth] = true;
+ }
+
+ // Set (or reset) the pointer to a UniqueJSONStrings.
+ void SetUniqueStrings(UniqueJSONStrings& aUniqueStrings) {
+ MOZ_RELEASE_ASSERT(!mUniqueStrings);
+ mUniqueStrings = &aUniqueStrings;
+ }
+
+ // Set (or reset) the pointer to a UniqueJSONStrings.
+ void ResetUniqueStrings() {
+ MOZ_RELEASE_ASSERT(mUniqueStrings);
+ mUniqueStrings = nullptr;
+ }
+
+ // Add `aStr` to the unique-strings list (if not already there), and write its
+ // index as a named object property.
+ inline void UniqueStringProperty(const Span<const char>& aName,
+ const Span<const char>& aStr);
+
+ // Add `aStr` to the unique-strings list (if not already there), and write its
+ // index as an array element.
+ inline void UniqueStringElement(const Span<const char>& aStr);
+
+ // THe following functions override JSONWriter functions non-virtually. The
+ // goal is to try and prevent calls that specify a style, which would be
+ // ignored anyway because the whole thing is single-lined. It's fine if some
+ // calls still make it through a `JSONWriter&`, no big deal.
+ void Start() { JSONWriter::Start(); }
+ void StartArrayProperty(const Span<const char>& aName) {
+ JSONWriter::StartArrayProperty(aName);
+ }
+ template <size_t N>
+ void StartArrayProperty(const char (&aName)[N]) {
+ JSONWriter::StartArrayProperty(Span<const char>(aName, N));
+ }
+ void StartArrayElement() { JSONWriter::StartArrayElement(); }
+ void StartObjectProperty(const Span<const char>& aName) {
+ JSONWriter::StartObjectProperty(aName);
+ }
+ template <size_t N>
+ void StartObjectProperty(const char (&aName)[N]) {
+ JSONWriter::StartObjectProperty(Span<const char>(aName, N));
+ }
+ void StartObjectElement() { JSONWriter::StartObjectElement(); }
+
+ FAILURELATCH_IMPL_PROXY(*mFailureLatch)
+
+ protected:
+ NotNull<FailureLatch*> mFailureLatch;
+
+ private:
+ UniqueJSONStrings* mUniqueStrings = nullptr;
+};
+
+class SpliceableChunkedJSONWriter final : public SpliceableJSONWriter {
+ public:
+ explicit SpliceableChunkedJSONWriter(FailureLatch& aFailureLatch)
+ : SpliceableJSONWriter(MakeUnique<ChunkedJSONWriteFunc>(aFailureLatch),
+ aFailureLatch) {}
+
+ // Access the ChunkedJSONWriteFunc as reference-to-const, usually to copy data
+ // out.
+ const ChunkedJSONWriteFunc& ChunkedWriteFunc() const {
+ return ChunkedWriteFuncRef();
+ }
+
+ // Access the ChunkedJSONWriteFunc as rvalue-reference, usually to take its
+ // data out. This writer shouldn't be used anymore after this.
+ ChunkedJSONWriteFunc&& TakeChunkedWriteFunc() {
+ ChunkedJSONWriteFunc& ref = ChunkedWriteFuncRef();
+#ifdef DEBUG
+ mTaken = true;
+#endif //
+ return std::move(ref);
+ }
+
+ // Adopts the chunks from aFunc without copying.
+ void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) override {
+ MOZ_ASSERT(!mTaken);
+ Separator();
+ ChunkedWriteFuncRef().Take(std::move(aFunc));
+ mNeedComma[mDepth] = true;
+ }
+
+ void ChangeFailureLatchAndForwardState(FailureLatch& aFailureLatch) {
+ mFailureLatch = WrapNotNullUnchecked(&aFailureLatch);
+ return ChunkedWriteFuncRef().ChangeFailureLatchAndForwardState(
+ aFailureLatch);
+ }
+
+ private:
+ const ChunkedJSONWriteFunc& ChunkedWriteFuncRef() const {
+ MOZ_ASSERT(!mTaken);
+ // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+ // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc&.
+ return static_cast<const ChunkedJSONWriteFunc&>(WriteFunc());
+ }
+
+ ChunkedJSONWriteFunc& ChunkedWriteFuncRef() {
+ MOZ_ASSERT(!mTaken);
+ // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+ // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc&.
+ return static_cast<ChunkedJSONWriteFunc&>(WriteFunc());
+ }
+
+#ifdef DEBUG
+ bool mTaken = false;
+#endif
+};
+
+class JSONSchemaWriter {
+ JSONWriter& mWriter;
+ uint32_t mIndex;
+
+ public:
+ explicit JSONSchemaWriter(JSONWriter& aWriter) : mWriter(aWriter), mIndex(0) {
+ aWriter.StartObjectProperty("schema",
+ SpliceableJSONWriter::SingleLineStyle);
+ }
+
+ void WriteField(const Span<const char>& aName) {
+ mWriter.IntProperty(aName, mIndex++);
+ }
+
+ template <size_t Np1>
+ void WriteField(const char (&aName)[Np1]) {
+ WriteField(Span<const char>(aName, Np1 - 1));
+ }
+
+ ~JSONSchemaWriter() { mWriter.EndObject(); }
+};
+
+// This class helps create an indexed list of unique strings, and inserts the
+// index as a JSON value. The collected list of unique strings can later be
+// inserted as a JSON array.
+// This can be useful for elements/properties with many repeated strings.
+//
+// With only JSONWriter w,
+// `w.WriteElement("a"); w.WriteElement("b"); w.WriteElement("a");`
+// when done inside a JSON array, will generate:
+// `["a", "b", "c"]`
+//
+// With UniqueStrings u,
+// `u.WriteElement(w, "a"); u.WriteElement(w, "b"); u.WriteElement(w, "a");`
+// when done inside a JSON array, will generate:
+// `[0, 1, 0]`
+// and later, `u.SpliceStringTableElements(w)` (inside a JSON array), will
+// output the corresponding indexed list of unique strings:
+// `["a", "b"]`
+class UniqueJSONStrings final : public FailureLatch {
+ public:
+ // Start an empty list of unique strings.
+ MFBT_API explicit UniqueJSONStrings(FailureLatch& aFailureLatch);
+
+ // Start with a copy of the strings from another list.
+ MFBT_API UniqueJSONStrings(FailureLatch& aFailureLatch,
+ const UniqueJSONStrings& aOther,
+ ProgressLogger aProgressLogger);
+
+ MFBT_API ~UniqueJSONStrings();
+
+ // Add `aStr` to the list (if not already there), and write its index as a
+ // named object property.
+ void WriteProperty(SpliceableJSONWriter& aWriter,
+ const Span<const char>& aName,
+ const Span<const char>& aStr) {
+ if (const Maybe<uint32_t> maybeIndex = GetOrAddIndex(aStr); maybeIndex) {
+ aWriter.IntProperty(aName, *maybeIndex);
+ } else {
+ aWriter.SetFailureFrom(*this);
+ }
+ }
+
+ // Add `aStr` to the list (if not already there), and write its index as an
+ // array element.
+ void WriteElement(SpliceableJSONWriter& aWriter,
+ const Span<const char>& aStr) {
+ if (const Maybe<uint32_t> maybeIndex = GetOrAddIndex(aStr); maybeIndex) {
+ aWriter.IntElement(*maybeIndex);
+ } else if (!aWriter.Failed()) {
+ aWriter.SetFailureFrom(*this);
+ }
+ }
+
+ // Splice all collected unique strings into an array. This should only be done
+ // once, and then this UniqueStrings shouldn't be used anymore.
+ MFBT_API void SpliceStringTableElements(SpliceableJSONWriter& aWriter);
+
+ FAILURELATCH_IMPL_PROXY(mStringTableWriter)
+
+ void ChangeFailureLatchAndForwardState(FailureLatch& aFailureLatch) {
+ mStringTableWriter.ChangeFailureLatchAndForwardState(aFailureLatch);
+ }
+
+ private:
+ MFBT_API void ClearAndSetFailure(std::string aFailure);
+
+ // If `aStr` is already listed, return its index.
+ // Otherwise add it to the list and return the new index.
+ MFBT_API Maybe<uint32_t> GetOrAddIndex(const Span<const char>& aStr);
+
+ SpliceableChunkedJSONWriter mStringTableWriter;
+ HashMap<HashNumber, uint32_t> mStringHashToIndexMap;
+};
+
+void SpliceableJSONWriter::UniqueStringProperty(const Span<const char>& aName,
+ const Span<const char>& aStr) {
+ MOZ_RELEASE_ASSERT(mUniqueStrings);
+ mUniqueStrings->WriteProperty(*this, aName, aStr);
+}
+
+// Add `aStr` to the list (if not already there), and write its index as an
+// array element.
+void SpliceableJSONWriter::UniqueStringElement(const Span<const char>& aStr) {
+ MOZ_RELEASE_ASSERT(mUniqueStrings);
+ mUniqueStrings->WriteElement(*this, aStr);
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // BASEPROFILEJSONWRITER_H
diff --git a/mozglue/baseprofiler/public/BaseProfiler.h b/mozglue/baseprofiler/public/BaseProfiler.h
new file mode 100644
index 0000000000..b66ef61126
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfiler.h
@@ -0,0 +1,506 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef BaseProfiler_h
+#define BaseProfiler_h
+
+// This file is safe to include unconditionally, and only defines
+// empty macros if MOZ_GECKO_PROFILER is not set.
+
+// These headers are also safe to include unconditionally, with empty macros if
+// MOZ_GECKO_PROFILER is not set.
+// If your file only uses particular APIs (e.g., only markers), please consider
+// including only the needed headers instead of this one, to reduce compilation
+// dependencies.
+#include "mozilla/BaseProfilerCounts.h"
+#include "mozilla/BaseProfilerLabels.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "mozilla/BaseProfilerState.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+# include "mozilla/UniquePtr.h"
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+# define AUTO_BASE_PROFILER_INIT \
+ ::mozilla::baseprofiler::profiler_init_main_thread_id()
+
+# define BASE_PROFILER_REGISTER_THREAD(name)
+# define BASE_PROFILER_UNREGISTER_THREAD()
+# define AUTO_BASE_PROFILER_REGISTER_THREAD(name)
+
+# define AUTO_BASE_PROFILER_THREAD_SLEEP
+# define AUTO_BASE_PROFILER_THREAD_WAKE
+
+// Function stubs for when MOZ_GECKO_PROFILER is not defined.
+
+namespace mozilla {
+
+namespace baseprofiler {
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_get_backtrace`.
+struct ProfilerBacktrace {};
+using UniqueProfilerBacktrace = UniquePtr<ProfilerBacktrace>;
+
+// Get/Capture-backtrace functions can return nullptr or false, the result
+// should be fed to another empty macro or stub anyway.
+
+static inline UniqueProfilerBacktrace profiler_get_backtrace() {
+ return nullptr;
+}
+
+static inline bool profiler_capture_backtrace_into(
+ ProfileChunkedBuffer& aChunkedBuffer, StackCaptureOptions aCaptureOptions) {
+ return false;
+}
+
+static inline UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+ return nullptr;
+}
+
+static inline void profiler_init(void* stackTop) {}
+
+static inline void profiler_shutdown() {}
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#else // !MOZ_GECKO_PROFILER
+
+# include "BaseProfilingStack.h"
+
+# include "mozilla/Assertions.h"
+# include "mozilla/Atomics.h"
+# include "mozilla/Attributes.h"
+# include "mozilla/BaseProfilerRAIIMacro.h"
+# include "mozilla/Maybe.h"
+# include "mozilla/PowerOfTwo.h"
+# include "mozilla/TimeStamp.h"
+# include "mozilla/UniquePtr.h"
+
+# include <functional>
+# include <stdint.h>
+# include <string>
+
+namespace mozilla {
+
+class MallocAllocPolicy;
+class ProfileChunkedBuffer;
+enum class StackCaptureOptions;
+template <class T, size_t MinInlineCapacity, class AllocPolicy>
+class Vector;
+
+namespace baseprofiler {
+
+class ProfilerBacktrace;
+class SpliceableJSONWriter;
+
+//---------------------------------------------------------------------------
+// Start and stop the profiler
+//---------------------------------------------------------------------------
+
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_ENTRIES =
+# if !defined(GP_PLAT_arm_android)
+ MakePowerOfTwo32<16 * 1024 * 1024>(); // 16M entries = 128MiB
+# else
+ MakePowerOfTwo32<4 * 1024 * 1024>(); // 4M entries = 32MiB
+# endif
+
+// Startup profiling usually need to capture more data, especially on slow
+// systems.
+// Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
+// https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_STARTUP_ENTRIES =
+# if !defined(GP_PLAT_arm_android)
+ mozilla::MakePowerOfTwo32<64 * 1024 * 1024>(); // 64M entries = 512MiB
+# else
+ mozilla::MakePowerOfTwo32<16 * 1024 * 1024>(); // 16M entries = 128MiB
+# endif
+
+// Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
+// https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
+# define BASE_PROFILER_DEFAULT_INTERVAL 1 /* millisecond */
+# define BASE_PROFILER_MAX_INTERVAL 5000 /* milliseconds */
+
+// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will
+// also be started. This call must happen before any other profiler calls
+// (except profiler_start(), which will call profiler_init() if it hasn't
+// already run).
+MFBT_API void profiler_init(void* stackTop);
+
+# define AUTO_BASE_PROFILER_INIT \
+ ::mozilla::baseprofiler::AutoProfilerInit PROFILER_RAII
+
+// Clean up the profiler module, stopping it if required. This function may
+// also save a shutdown profile if requested. No profiler calls should happen
+// after this point and all profiling stack labels should have been popped.
+MFBT_API void profiler_shutdown();
+
+// Start the profiler -- initializing it first if necessary -- with the
+// selected options. Stops and restarts the profiler if it is already active.
+// After starting the profiler is "active". The samples will be recorded in a
+// circular buffer.
+// "aCapacity" is the maximum number of 8-byte entries in the profiler's
+// circular buffer.
+// "aInterval" the sampling interval, measured in millseconds.
+// "aFeatures" is the feature set. Features unsupported by this
+// platform/configuration are ignored.
+// "aFilters" is the list of thread filters. Threads that do not match any
+// of the filters are not profiled. A filter matches a thread if
+// (a) the thread name contains the filter as a case-insensitive
+// substring, or
+// (b) the filter is of the form "pid:<n>" where n is the process
+// id of the process that the thread is running in.
+// "aDuration" is the duration of entries in the profiler's circular buffer.
+MFBT_API void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount,
+ const Maybe<double>& aDuration = Nothing());
+
+// Stop the profiler and discard the profile without saving it. A no-op if the
+// profiler is inactive. After stopping the profiler is "inactive".
+MFBT_API void profiler_stop();
+
+// If the profiler is inactive, start it. If it's already active, restart it if
+// the requested settings differ from the current settings. Both the check and
+// the state change are performed while the profiler state is locked.
+// The only difference to profiler_start is that the current buffer contents are
+// not discarded if the profiler is already running with the requested settings.
+MFBT_API void profiler_ensure_started(
+ PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration = Nothing());
+
+//---------------------------------------------------------------------------
+// Control the profiler
+//---------------------------------------------------------------------------
+
+// Register/unregister threads with the profiler. Both functions operate the
+// same whether the profiler is active or inactive.
+# define BASE_PROFILER_REGISTER_THREAD(name) \
+ do { \
+ char stackTop; \
+ ::mozilla::baseprofiler::profiler_register_thread(name, &stackTop); \
+ } while (0)
+# define BASE_PROFILER_UNREGISTER_THREAD() \
+ ::mozilla::baseprofiler::profiler_unregister_thread()
+MFBT_API ProfilingStack* profiler_register_thread(const char* name,
+ void* guessStackTop);
+MFBT_API void profiler_unregister_thread();
+
+// Registers a DOM Window (the JS global `window`) with the profiler. Each
+// Window _roughly_ corresponds to a single document loaded within a
+// browsing context. Both the Window Id and Browser Id are recorded to allow
+// correlating different Windows loaded within the same tab or frame element.
+//
+// We register pages for each navigations but we do not register
+// history.pushState or history.replaceState since they correspond to the same
+// Inner Window ID. When a browsing context is first loaded, the first url
+// loaded in it will be about:blank. Because of that, this call keeps the first
+// non-about:blank registration of window and discards the previous one.
+//
+// "aTabID" is the BrowserId of that document belongs to.
+// That's used to determine the tab of that page.
+// "aInnerWindowID" is the ID of the `window` global object of that
+// document.
+// "aUrl" is the URL of the page.
+// "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to
+// determine sub documents of a page.
+MFBT_API void profiler_register_page(uint64_t aTabD, uint64_t aInnerWindowID,
+ const std::string& aUrl,
+ uint64_t aEmbedderInnerWindowID);
+
+// Unregister page with the profiler.
+//
+// Take a Inner Window ID and unregister the page entry that has the same ID.
+MFBT_API void profiler_unregister_page(uint64_t aRegisteredInnerWindowID);
+
+// Remove all registered and unregistered pages in the profiler.
+void profiler_clear_all_pages();
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+// Register and unregister a thread within a scope.
+# define AUTO_BASE_PROFILER_REGISTER_THREAD(name) \
+ ::mozilla::baseprofiler::AutoProfilerRegisterThread PROFILER_RAII(name)
+
+// Pause and resume the profiler. No-ops if the profiler is inactive. While
+// paused the profile will not take any samples and will not record any data
+// into its buffers. The profiler remains fully initialized in this state.
+// This feature will keep JavaScript profiling enabled, thus allowing toggling
+// the profiler without invalidating the JIT.
+MFBT_API void profiler_pause();
+MFBT_API void profiler_resume();
+
+// Only pause and resume the periodic sampling loop, including stack sampling,
+// counters, and profiling overheads.
+MFBT_API void profiler_pause_sampling();
+MFBT_API void profiler_resume_sampling();
+
+// These functions tell the profiler that a thread went to sleep so that we can
+// avoid sampling it while it's sleeping. Calling profiler_thread_sleep()
+// twice without an intervening profiler_thread_wake() is an error. All three
+// functions operate the same whether the profiler is active or inactive.
+MFBT_API void profiler_thread_sleep();
+MFBT_API void profiler_thread_wake();
+
+// Mark a thread as asleep/awake within a scope.
+# define AUTO_BASE_PROFILER_THREAD_SLEEP \
+ ::mozilla::baseprofiler::AutoProfilerThreadSleep PROFILER_RAII
+# define AUTO_BASE_PROFILER_THREAD_WAKE \
+ ::mozilla::baseprofiler::AutoProfilerThreadWake PROFILER_RAII
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Get the params used to start the profiler. Returns 0 and an empty vector
+// (via outparams) if the profile is inactive. It's possible that the features
+// returned may be slightly different to those requested due to required
+// adjustments.
+MFBT_API void profiler_get_start_params(
+ int* aEntrySize, Maybe<double>* aDuration, double* aInterval,
+ uint32_t* aFeatures, Vector<const char*, 0, MallocAllocPolicy>* aFilters);
+
+// The number of milliseconds since the process started. Operates the same
+// whether the profiler is active or inactive.
+MFBT_API double profiler_time();
+
+// An object of this class is passed to profiler_suspend_and_sample_thread().
+// For each stack frame, one of the Collect methods will be called.
+class ProfilerStackCollector {
+ public:
+ // Some collectors need to worry about possibly overwriting previous
+ // generations of data. If that's not an issue, this can return Nothing,
+ // which is the default behaviour.
+ virtual Maybe<uint64_t> SamplePositionInBuffer() { return Nothing(); }
+ virtual Maybe<uint64_t> BufferRangeStart() { return Nothing(); }
+
+ // This method will be called once if the thread being suspended is the main
+ // thread. Default behaviour is to do nothing.
+ virtual void SetIsMainThread() {}
+
+ // WARNING: The target thread is suspended when the Collect methods are
+ // called. Do not try to allocate or acquire any locks, or you could
+ // deadlock. The target thread will have resumed by the time this function
+ // returns.
+
+ virtual void CollectNativeLeafAddr(void* aAddr) = 0;
+
+ virtual void CollectProfilingStackFrame(
+ const ProfilingStackFrame& aFrame) = 0;
+};
+
+// This method suspends the thread identified by aThreadId, samples its
+// profiling stack, JS stack, and (optionally) native stack, passing the
+// collected frames into aCollector. aFeatures dictates which compiler features
+// are used. |Leaf| is the only relevant one.
+// Use `aThreadId`=0 to sample the current thread.
+MFBT_API void profiler_suspend_and_sample_thread(
+ int aThreadId, uint32_t aFeatures, ProfilerStackCollector& aCollector,
+ bool aSampleNative = true);
+
+struct ProfilerBacktraceDestructor {
+ MFBT_API void operator()(ProfilerBacktrace*);
+};
+
+using UniqueProfilerBacktrace =
+ UniquePtr<ProfilerBacktrace, ProfilerBacktraceDestructor>;
+
+// Immediately capture the current thread's call stack, store it in the provided
+// buffer (usually to avoid allocations if you can construct the buffer on the
+// stack). Returns false if unsuccessful, if the profiler is inactive, or if
+// aCaptureOptions is NoStack.
+MFBT_API bool profiler_capture_backtrace_into(
+ ProfileChunkedBuffer& aChunkedBuffer, StackCaptureOptions aCaptureOptions);
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()).
+// May be null if unsuccessful, or if the profiler is inactive.
+MFBT_API UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace();
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfilerBacktrace (usually for later use in marker function that take a
+// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is
+// inactive.
+MFBT_API UniqueProfilerBacktrace profiler_get_backtrace();
+
+struct ProfilerStats {
+ unsigned n = 0;
+ double sum = 0;
+ double min = std::numeric_limits<double>::max();
+ double max = 0;
+ void Count(double v) {
+ ++n;
+ sum += v;
+ if (v < min) {
+ min = v;
+ }
+ if (v > max) {
+ max = v;
+ }
+ }
+};
+
+struct ProfilerBufferInfo {
+ // Index of the oldest entry.
+ uint64_t mRangeStart;
+ // Index of the newest entry.
+ uint64_t mRangeEnd;
+ // Buffer capacity in number of 8-byte entries.
+ uint32_t mEntryCount;
+ // Sampling stats: Interval (us) between successive samplings.
+ ProfilerStats mIntervalsUs;
+ // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+ ProfilerStats mOverheadsUs;
+ // Sampling stats: Time (us) to acquire the lock before sampling.
+ ProfilerStats mLockingsUs;
+ // Sampling stats: Time (us) to discard expired data.
+ ProfilerStats mCleaningsUs;
+ // Sampling stats: Time (us) to collect counter data.
+ ProfilerStats mCountersUs;
+ // Sampling stats: Time (us) to sample thread stacks.
+ ProfilerStats mThreadsUs;
+};
+
+// Get information about the current buffer status.
+// Returns Nothing() if the profiler is inactive.
+//
+// This information may be useful to a user-interface displaying the current
+// status of the profiler, allowing the user to get a sense for how fast the
+// buffer is being written to, and how much data is visible.
+MFBT_API Maybe<ProfilerBufferInfo> profiler_get_buffer_info();
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+namespace mozilla {
+namespace baseprofiler {
+
+//---------------------------------------------------------------------------
+// Put profiling data into the profiler (markers)
+//---------------------------------------------------------------------------
+
+MFBT_API void profiler_add_js_marker(const char* aMarkerName,
+ const char* aMarkerText);
+
+//---------------------------------------------------------------------------
+// Output profiles
+//---------------------------------------------------------------------------
+
+// Set a user-friendly process name, used in JSON stream.
+MFBT_API void profiler_set_process_name(const std::string& aProcessName,
+ const std::string* aETLDplus1);
+
+// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the
+// profiler is inactive.
+// If aIsShuttingDown is true, the current time is included as the process
+// shutdown time in the JSON's "meta" object.
+MFBT_API UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0,
+ bool aIsShuttingDown = false,
+ bool aOnlyThreads = false);
+
+// Write the profile for this process (excluding subprocesses) into aWriter.
+// Returns false if the profiler is inactive.
+MFBT_API bool profiler_stream_json_for_this_process(
+ SpliceableJSONWriter& aWriter, double aSinceTime = 0,
+ bool aIsShuttingDown = false, bool aOnlyThreads = false);
+
+// Get the profile and write it into a file. A no-op if the profile is
+// inactive.
+// Prefixed with "base" to avoid clashing with Gecko Profiler's extern "C"
+// profiler_save_profile_to_file when called from debugger.
+MFBT_API void baseprofiler_save_profile_to_file(const char* aFilename);
+
+//---------------------------------------------------------------------------
+// RAII classes
+//---------------------------------------------------------------------------
+
+class MOZ_RAII AutoProfilerInit {
+ public:
+ explicit AutoProfilerInit() { profiler_init(this); }
+
+ ~AutoProfilerInit() { profiler_shutdown(); }
+
+ private:
+};
+
+// Convenience class to register and unregister a thread with the profiler.
+// Needs to be the first object on the stack of the thread.
+class MOZ_RAII AutoProfilerRegisterThread final {
+ public:
+ explicit AutoProfilerRegisterThread(const char* aName) {
+ profiler_register_thread(aName, this);
+ }
+
+ ~AutoProfilerRegisterThread() { profiler_unregister_thread(); }
+
+ private:
+ AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete;
+ AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) =
+ delete;
+};
+
+class MOZ_RAII AutoProfilerThreadSleep {
+ public:
+ explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); }
+
+ ~AutoProfilerThreadSleep() { profiler_thread_wake(); }
+
+ private:
+};
+
+// Temporarily wake up the profiling of a thread while servicing events such as
+// Asynchronous Procedure Calls (APCs).
+class MOZ_RAII AutoProfilerThreadWake {
+ public:
+ explicit AutoProfilerThreadWake()
+ : mIssuedWake(profiler_thread_is_sleeping()) {
+ if (mIssuedWake) {
+ profiler_thread_wake();
+ }
+ }
+
+ ~AutoProfilerThreadWake() {
+ if (mIssuedWake) {
+ MOZ_ASSERT(!profiler_thread_is_sleeping());
+ profiler_thread_sleep();
+ }
+ }
+
+ private:
+ bool mIssuedWake;
+};
+
+// Get the MOZ_PROFILER_STARTUP* environment variables that should be
+// supplied to a child process that is about to be launched, in order
+// to make that child process start with the same profiler settings as
+// in the current process. The given function is invoked once for
+// each variable to be set.
+MFBT_API void GetProfilerEnvVarsForChildProcess(
+ std::function<void(const char* key, const char* value)>&& aSetEnv);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // !MOZ_GECKO_PROFILER
+
+#endif // BaseProfiler_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerCounts.h b/mozglue/baseprofiler/public/BaseProfilerCounts.h
new file mode 100644
index 0000000000..b25aa7215b
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerCounts.h
@@ -0,0 +1,281 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerCounts_h
+#define BaseProfilerCounts_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+# define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description)
+# define BASE_PROFILER_DEFINE_COUNT(label, category, description)
+# define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description)
+# define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count)
+# define AUTO_BASE_PROFILER_COUNT(label)
+# define AUTO_BASE_PROFILER_STATIC_COUNT(label, count)
+# define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label)
+
+#else
+
+# include "mozilla/Assertions.h"
+# include "mozilla/Atomics.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+typedef Atomic<int64_t, MemoryOrdering::Relaxed> ProfilerAtomicSigned;
+typedef Atomic<uint64_t, MemoryOrdering::Relaxed> ProfilerAtomicUnsigned;
+
+// Counter support
+// There are two types of counters:
+// 1) a simple counter which can be added to or subtracted from. This could
+// track the number of objects of a type, the number of calls to something
+// (reflow, JIT, etc).
+// 2) a combined counter which has the above, plus a number-of-calls counter
+// that is incremented by 1 for each call to modify the count. This provides
+// an optional source for a 'heatmap' of access. This can be used (for
+// example) to track the amount of memory allocated, and provide a heatmap of
+// memory operations (allocs/frees).
+//
+// Counters are sampled by the profiler once per sample-period. At this time,
+// all counters are global to the process. In the future, there might be more
+// versions with per-thread or other discriminators.
+//
+// Typical usage:
+// There are two ways to use counters: With heap-created counter objects,
+// or using macros. Note: the macros use statics, and will be slightly
+// faster/smaller, and you need to care about creating them before using
+// them. They're similar to the use-pattern for the other AUTO_PROFILER*
+// macros, but they do need the PROFILER_DEFINE* to be use to instantiate
+// the statics.
+//
+// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... }
+//
+// or (to also get a heatmap)
+//
+// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() {
+// ...
+// AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated);
+// ...
+// }
+//
+// To use without statics/macros:
+//
+// UniquePtr<ProfilerCounter> myCounter;
+// ...
+// myCounter =
+// MakeUnique<ProfilerCounter>("mything", "JIT", "Some JIT byte count"));
+// ...
+// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... }
+
+class BaseProfilerCount {
+ public:
+ BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter,
+ ProfilerAtomicUnsigned* aNumber, const char* aCategory,
+ const char* aDescription)
+ : mLabel(aLabel),
+ mCategory(aCategory),
+ mDescription(aDescription),
+ mCounter(aCounter),
+ mNumber(aNumber) {
+# define COUNTER_CANARY 0xDEADBEEF
+# ifdef DEBUG
+ mCanary = COUNTER_CANARY;
+ mPrevNumber = 0;
+# endif
+ // Can't call profiler_* here since this may be non-xul-library
+ }
+# ifdef DEBUG
+ ~BaseProfilerCount() { mCanary = 0; }
+# endif
+
+ void Sample(int64_t& aCounter, uint64_t& aNumber) {
+ MOZ_ASSERT(mCanary == COUNTER_CANARY);
+
+ aCounter = *mCounter;
+ aNumber = mNumber ? *mNumber : 0;
+# ifdef DEBUG
+ MOZ_ASSERT(aNumber >= mPrevNumber);
+ mPrevNumber = aNumber;
+# endif
+ }
+
+ // We don't define ++ and Add() here, since the static defines directly
+ // increment the atomic counters, and the subclasses implement ++ and
+ // Add() directly.
+
+ // These typically are static strings (for example if you use the macros
+ // below)
+ const char* mLabel;
+ const char* mCategory;
+ const char* mDescription;
+ // We're ok with these being un-ordered in race conditions. These are
+ // pointers because we want to be able to use statics and increment them
+ // directly. Otherwise we could just have them inline, and not need the
+ // constructor args.
+ // These can be static globals (using the macros below), though they
+ // don't have to be - their lifetime must be longer than the use of them
+ // by the profiler (see profiler_add/remove_sampled_counter()). If you're
+ // using a lot of these, they probably should be allocated at runtime (see
+ // class ProfilerCountOnly below).
+ ProfilerAtomicSigned* mCounter;
+ ProfilerAtomicUnsigned* mNumber; // may be null
+
+# ifdef DEBUG
+ uint32_t mCanary;
+ uint64_t mPrevNumber; // value of number from the last Sample()
+# endif
+};
+
+// Designed to be allocated dynamically, and simply incremented with obj++
+// or obj->Add(n)
+class ProfilerCounter final : public BaseProfilerCount {
+ public:
+ ProfilerCounter(const char* aLabel, const char* aCategory,
+ const char* aDescription)
+ : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) {
+ // Assume we're in libxul
+ profiler_add_sampled_counter(this);
+ }
+
+ virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); }
+
+ BaseProfilerCount& operator++() {
+ Add(1);
+ return *this;
+ }
+
+ void Add(int64_t aNumber) { mCounter += aNumber; }
+
+ ProfilerAtomicSigned mCounter;
+};
+
+// Also keeps a heatmap (number of calls to ++/Add())
+class ProfilerCounterTotal final : public BaseProfilerCount {
+ public:
+ ProfilerCounterTotal(const char* aLabel, const char* aCategory,
+ const char* aDescription)
+ : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory,
+ aDescription) {
+ // Assume we're in libxul
+ profiler_add_sampled_counter(this);
+ }
+
+ virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); }
+
+ BaseProfilerCount& operator++() {
+ Add(1);
+ return *this;
+ }
+
+ void Add(int64_t aNumber) {
+ mCounter += aNumber;
+ mNumber++;
+ }
+
+ ProfilerAtomicSigned mCounter;
+ ProfilerAtomicUnsigned mNumber;
+};
+
+// Defines a counter that is sampled on each profiler tick, with a running
+// count (signed), and number-of-instances. Note that because these are two
+// independent Atomics, there is a possiblity that count will not include
+// the last call, but number of uses will. I think this is not worth
+// worrying about
+# define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \
+ ProfilerAtomicSigned profiler_count_##label(0); \
+ ProfilerAtomicUnsigned profiler_number_##label(0); \
+ const char profiler_category_##label[] = category; \
+ const char profiler_description_##label[] = description; \
+ UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This counts, but doesn't keep track of the number of calls to
+// AUTO_PROFILER_COUNT()
+# define BASE_PROFILER_DEFINE_COUNT(label, category, description) \
+ ProfilerAtomicSigned profiler_count_##label(0); \
+ const char profiler_category_##label[] = category; \
+ const char profiler_description_##label[] = description; \
+ UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This will create a static initializer if used, but avoids a possible
+// allocation.
+# define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, \
+ description) \
+ ProfilerAtomicSigned profiler_count_##label(0); \
+ ProfilerAtomicUnsigned profiler_number_##label(0); \
+ ::mozilla::baseprofiler::BaseProfilerCount AutoCount_##label( \
+ #label, &profiler_count_##label, &profiler_number_##label, category, \
+ description);
+
+// If we didn't care about static initializers, we could avoid the need for
+// a ptr to the BaseProfilerCount object.
+
+// XXX It would be better to do this without the if() and without the
+// theoretical race to set the UniquePtr (i.e. possible leak).
+# define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count) \
+ do { \
+ profiler_number_##label++; /* do this first*/ \
+ profiler_count_##label += count; \
+ if (!AutoCount_##label) { \
+ /* Ignore that we could call this twice in theory, and that we leak \
+ * them \
+ */ \
+ AutoCount_##label.reset(new BaseProfilerCount( \
+ #label, &profiler_count_##label, &profiler_number_##label, \
+ profiler_category_##label, profiler_description_##label)); \
+ ::mozilla::baseprofiler::profiler_add_sampled_counter( \
+ AutoCount_##label.get()); \
+ } \
+ } while (0)
+
+# define AUTO_BASE_PROFILER_COUNT(label, count) \
+ do { \
+ profiler_count_##label += count; /* do this first*/ \
+ if (!AutoCount_##label) { \
+ /* Ignore that we could call this twice in theory, and that we leak \
+ * them \
+ */ \
+ AutoCount_##label.reset(new BaseProfilerCount( \
+ #label, nullptr, &profiler_number_##label, \
+ profiler_category_##label, profiler_description_##label)); \
+ ::mozilla::baseprofiler::profiler_add_sampled_counter( \
+ AutoCount_##label.get()); \
+ } \
+ } while (0)
+
+# define AUTO_BASE_PROFILER_STATIC_COUNT(label, count) \
+ do { \
+ profiler_number_##label++; /* do this first*/ \
+ profiler_count_##label += count; \
+ } while (0)
+
+// if we need to force the allocation
+# define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label) \
+ do { \
+ if (!AutoCount_##label) { \
+ /* Ignore that we could call this twice in theory, and that we leak \
+ * them \
+ */ \
+ AutoCount_##label.reset( \
+ new ::mozilla::baseprofiler::BaseProfilerCount( \
+ #label, &profiler_count_##label, &profiler_number_##label, \
+ profiler_category_##label, profiler_description_##label)); \
+ } \
+ } while (0)
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // !MOZ_GECKO_PROFILER
+
+#endif // BaseProfilerCounts_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerDetail.h b/mozglue/baseprofiler/public/BaseProfilerDetail.h
new file mode 100644
index 0000000000..6ecf6e117b
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerDetail.h
@@ -0,0 +1,285 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Internal Base Profiler utilities.
+
+#ifndef BaseProfilerDetail_h
+#define BaseProfilerDetail_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/PlatformMutex.h"
+#include "mozilla/PlatformRWLock.h"
+#include "mozilla/BaseProfilerUtils.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+namespace detail {
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+class MOZ_CAPABILITY("mutex") BaseProfilerMutex
+ : private ::mozilla::detail::MutexImpl {
+ public:
+ BaseProfilerMutex() : ::mozilla::detail::MutexImpl() {}
+ explicit BaseProfilerMutex(const char* aName)
+ : ::mozilla::detail::MutexImpl(), mName(aName) {}
+
+ BaseProfilerMutex(const BaseProfilerMutex&) = delete;
+ BaseProfilerMutex& operator=(const BaseProfilerMutex&) = delete;
+ BaseProfilerMutex(BaseProfilerMutex&&) = delete;
+ BaseProfilerMutex& operator=(BaseProfilerMutex&&) = delete;
+
+#ifdef DEBUG
+ ~BaseProfilerMutex() {
+ MOZ_ASSERT(!BaseProfilerThreadId::FromNumber(mOwningThreadId).IsSpecified(),
+ "BaseProfilerMutex should have been unlocked when destroyed");
+ }
+#endif // DEBUG
+
+ [[nodiscard]] bool IsLockedOnCurrentThread() const {
+ return BaseProfilerThreadId::FromNumber(mOwningThreadId) ==
+ baseprofiler::profiler_current_thread_id();
+ }
+
+ void AssertCurrentThreadOwns() const MOZ_ASSERT_CAPABILITY(this) {
+ MOZ_ASSERT(IsLockedOnCurrentThread());
+ }
+
+ void Lock() MOZ_CAPABILITY_ACQUIRE() {
+ const BaseProfilerThreadId tid = baseprofiler::profiler_current_thread_id();
+ MOZ_ASSERT(tid.IsSpecified());
+ MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+ ::mozilla::detail::MutexImpl::lock();
+ MOZ_ASSERT(!BaseProfilerThreadId::FromNumber(mOwningThreadId).IsSpecified(),
+ "Not unlocked properly");
+ mOwningThreadId = tid.ToNumber();
+ }
+
+ [[nodiscard]] bool TryLock() MOZ_TRY_ACQUIRE(true) {
+ const BaseProfilerThreadId tid = baseprofiler::profiler_current_thread_id();
+ MOZ_ASSERT(tid.IsSpecified());
+ MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+ if (!::mozilla::detail::MutexImpl::tryLock()) {
+ // Failed to lock, nothing more to do.
+ return false;
+ }
+ MOZ_ASSERT(!BaseProfilerThreadId::FromNumber(mOwningThreadId).IsSpecified(),
+ "Not unlocked properly");
+ mOwningThreadId = tid.ToNumber();
+ return true;
+ }
+
+ void Unlock() MOZ_CAPABILITY_RELEASE() {
+ MOZ_ASSERT(IsLockedOnCurrentThread(), "Unlocking when not locked here");
+ // We're still holding the mutex here, so it's safe to just reset
+ // `mOwningThreadId`.
+ mOwningThreadId = BaseProfilerThreadId{}.ToNumber();
+ ::mozilla::detail::MutexImpl::unlock();
+ }
+
+ const char* GetName() const { return mName; }
+
+ private:
+ // Thread currently owning the lock, or 0.
+ // Atomic because it may be read at any time independent of the mutex.
+ // Relaxed because threads only need to know if they own it already, so:
+ // - If it's their id, only *they* wrote that value with a locked mutex.
+ // - If it's different from their thread id it doesn't matter what other
+ // number it is (0 or another id) and that it can change again at any time.
+ Atomic<typename BaseProfilerThreadId::NumberType, MemoryOrdering::Relaxed>
+ mOwningThreadId;
+
+ const char* mName = nullptr;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerAutoLock {
+ public:
+ explicit BaseProfilerAutoLock(BaseProfilerMutex& aMutex) : mMutex(aMutex) {
+ mMutex.Lock();
+ }
+
+ BaseProfilerAutoLock(const BaseProfilerAutoLock&) = delete;
+ BaseProfilerAutoLock& operator=(const BaseProfilerAutoLock&) = delete;
+ BaseProfilerAutoLock(BaseProfilerAutoLock&&) = delete;
+ BaseProfilerAutoLock& operator=(BaseProfilerAutoLock&&) = delete;
+
+ ~BaseProfilerAutoLock() { mMutex.Unlock(); }
+
+ private:
+ BaseProfilerMutex& mMutex;
+};
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+// Actual mutex may be disabled at construction time.
+class BaseProfilerMaybeMutex : private ::mozilla::detail::MutexImpl {
+ public:
+ explicit BaseProfilerMaybeMutex(bool aActivate) {
+ if (aActivate) {
+ mMaybeMutex.emplace();
+ }
+ }
+
+ BaseProfilerMaybeMutex(const BaseProfilerMaybeMutex&) = delete;
+ BaseProfilerMaybeMutex& operator=(const BaseProfilerMaybeMutex&) = delete;
+ BaseProfilerMaybeMutex(BaseProfilerMaybeMutex&&) = delete;
+ BaseProfilerMaybeMutex& operator=(BaseProfilerMaybeMutex&&) = delete;
+
+ ~BaseProfilerMaybeMutex() = default;
+
+ bool IsActivated() const { return mMaybeMutex.isSome(); }
+
+ [[nodiscard]] bool IsActivatedAndLockedOnCurrentThread() const {
+ if (!IsActivated()) {
+ // Not activated, so we can never be locked.
+ return false;
+ }
+ return mMaybeMutex->IsLockedOnCurrentThread();
+ }
+
+ void AssertCurrentThreadOwns() const {
+#ifdef DEBUG
+ if (IsActivated()) {
+ mMaybeMutex->AssertCurrentThreadOwns();
+ }
+#endif // DEBUG
+ }
+
+ MOZ_PUSH_IGNORE_THREAD_SAFETY
+ void Lock() {
+ if (IsActivated()) {
+ mMaybeMutex->Lock();
+ }
+ }
+
+ void Unlock() {
+ if (IsActivated()) {
+ mMaybeMutex->Unlock();
+ }
+ }
+ MOZ_POP_THREAD_SAFETY
+
+ private:
+ Maybe<BaseProfilerMutex> mMaybeMutex;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerMaybeAutoLock {
+ public:
+ explicit BaseProfilerMaybeAutoLock(BaseProfilerMaybeMutex& aMaybeMutex)
+ : mMaybeMutex(aMaybeMutex) {
+ mMaybeMutex.Lock();
+ }
+
+ BaseProfilerMaybeAutoLock(const BaseProfilerMaybeAutoLock&) = delete;
+ BaseProfilerMaybeAutoLock& operator=(const BaseProfilerMaybeAutoLock&) =
+ delete;
+ BaseProfilerMaybeAutoLock(BaseProfilerMaybeAutoLock&&) = delete;
+ BaseProfilerMaybeAutoLock& operator=(BaseProfilerMaybeAutoLock&&) = delete;
+
+ ~BaseProfilerMaybeAutoLock() { mMaybeMutex.Unlock(); }
+
+ private:
+ BaseProfilerMaybeMutex& mMaybeMutex;
+};
+
+class BaseProfilerSharedMutex : public ::mozilla::detail::RWLockImpl {
+ public:
+#ifdef DEBUG
+ ~BaseProfilerSharedMutex() {
+ MOZ_ASSERT(!BaseProfilerThreadId::FromNumber(mOwningThreadId).IsSpecified(),
+ "BaseProfilerMutex should have been unlocked when destroyed");
+ }
+#endif // DEBUG
+
+ [[nodiscard]] bool IsLockedExclusiveOnCurrentThread() const {
+ return BaseProfilerThreadId::FromNumber(mOwningThreadId) ==
+ baseprofiler::profiler_current_thread_id();
+ }
+
+ void LockExclusive() {
+ const BaseProfilerThreadId tid = baseprofiler::profiler_current_thread_id();
+ MOZ_ASSERT(tid.IsSpecified());
+ MOZ_ASSERT(!IsLockedExclusiveOnCurrentThread(), "Recursive locking");
+ ::mozilla::detail::RWLockImpl::writeLock();
+ MOZ_ASSERT(!BaseProfilerThreadId::FromNumber(mOwningThreadId).IsSpecified(),
+ "Not unlocked properly");
+ mOwningThreadId = tid.ToNumber();
+ }
+
+ void UnlockExclusive() {
+ MOZ_ASSERT(IsLockedExclusiveOnCurrentThread(),
+ "Unlocking when not locked here");
+ // We're still holding the mutex here, so it's safe to just reset
+ // `mOwningThreadId`.
+ mOwningThreadId = BaseProfilerThreadId{}.ToNumber();
+ writeUnlock();
+ }
+
+ void LockShared() { readLock(); }
+
+ void UnlockShared() { readUnlock(); }
+
+ private:
+ // Thread currently owning the exclusive lock, or 0.
+ // Atomic because it may be read at any time independent of the mutex.
+ // Relaxed because threads only need to know if they own it already, so:
+ // - If it's their id, only *they* wrote that value with a locked mutex.
+ // - If it's different from their thread id it doesn't matter what other
+ // number it is (0 or another id) and that it can change again at any time.
+ Atomic<typename BaseProfilerThreadId::NumberType, MemoryOrdering::Relaxed>
+ mOwningThreadId;
+};
+
+// RAII class to lock a shared mutex exclusively.
+class MOZ_RAII BaseProfilerAutoLockExclusive {
+ public:
+ explicit BaseProfilerAutoLockExclusive(BaseProfilerSharedMutex& aSharedMutex)
+ : mSharedMutex(aSharedMutex) {
+ mSharedMutex.LockExclusive();
+ }
+
+ BaseProfilerAutoLockExclusive(const BaseProfilerAutoLockExclusive&) = delete;
+ BaseProfilerAutoLockExclusive& operator=(
+ const BaseProfilerAutoLockExclusive&) = delete;
+ BaseProfilerAutoLockExclusive(BaseProfilerAutoLockExclusive&&) = delete;
+ BaseProfilerAutoLockExclusive& operator=(BaseProfilerAutoLockExclusive&&) =
+ delete;
+
+ ~BaseProfilerAutoLockExclusive() { mSharedMutex.UnlockExclusive(); }
+
+ private:
+ BaseProfilerSharedMutex& mSharedMutex;
+};
+
+// RAII class to lock a shared mutex non-exclusively, other
+// BaseProfilerAutoLockShared's may happen in other threads.
+class MOZ_RAII BaseProfilerAutoLockShared {
+ public:
+ explicit BaseProfilerAutoLockShared(BaseProfilerSharedMutex& aSharedMutex)
+ : mSharedMutex(aSharedMutex) {
+ mSharedMutex.LockShared();
+ }
+
+ BaseProfilerAutoLockShared(const BaseProfilerAutoLockShared&) = delete;
+ BaseProfilerAutoLockShared& operator=(const BaseProfilerAutoLockShared&) =
+ delete;
+ BaseProfilerAutoLockShared(BaseProfilerAutoLockShared&&) = delete;
+ BaseProfilerAutoLockShared& operator=(BaseProfilerAutoLockShared&&) = delete;
+
+ ~BaseProfilerAutoLockShared() { mSharedMutex.UnlockShared(); }
+
+ private:
+ BaseProfilerSharedMutex& mSharedMutex;
+};
+
+} // namespace detail
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // BaseProfilerDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerLabels.h b/mozglue/baseprofiler/public/BaseProfilerLabels.h
new file mode 100644
index 0000000000..8da596e3ab
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerLabels.h
@@ -0,0 +1,178 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains all definitions related to Base Profiler labels (outside
+// of XUL).
+// It is safe to include unconditionally, and only defines empty macros if
+// MOZ_GECKO_PROFILER is not set.
+
+#ifndef BaseProfilerLabels_h
+#define BaseProfilerLabels_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+# define AUTO_BASE_PROFILER_LABEL(label, categoryPair)
+# define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr)
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str)
+# define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx)
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \
+ categoryPair, ctx, flags)
+
+#else // !MOZ_GECKO_PROFILER
+
+# include "BaseProfilingStack.h"
+
+# include "mozilla/Attributes.h"
+# include "mozilla/Maybe.h"
+# include "mozilla/BaseProfilerRAIIMacro.h"
+# include "mozilla/BaseProfilerState.h"
+# include "mozilla/ThreadLocal.h"
+
+# include <stdint.h>
+# include <string>
+
+namespace mozilla::baseprofiler {
+
+// Insert an RAII object in this scope to enter a label stack frame. Any
+// samples collected in this scope will contain this label in their stack.
+// The label argument must be a static C string. It is usually of the
+// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide
+// that for us, but __func__ gives us the function name without the class
+// name.) If the label applies to only part of a function, you can qualify it
+// like this: "ClassName::FunctionName:PartName".
+//
+// Use AUTO_BASE_PROFILER_LABEL_DYNAMIC_* if you want to add additional /
+// dynamic information to the label stack frame.
+# define AUTO_BASE_PROFILER_LABEL(label, categoryPair) \
+ ::mozilla::baseprofiler::AutoProfilerLabel PROFILER_RAII( \
+ label, nullptr, \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with only one argument: the category
+// pair. The label string is taken from the category pair. This is convenient
+// for labels like
+// AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding) which would
+// otherwise just repeat the string.
+# define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair) \
+ ::mozilla::baseprofiler::AutoProfilerLabel PROFILER_RAII( \
+ "", nullptr, \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, \
+ uint32_t(::mozilla::baseprofiler::ProfilingStackFrame::Flags:: \
+ LABEL_DETERMINED_BY_CATEGORY_PAIR))
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with an additional string. The
+// inserted RAII object stores the cStr pointer in a field; it does not copy the
+// string.
+//
+// WARNING: This means that the string you pass to this macro needs to live at
+// least until the end of the current scope. Be careful using this macro with
+// ns[C]String; the other AUTO_BASE_PROFILER_LABEL_DYNAMIC_* macros below are
+// preferred because they avoid this problem.
+//
+// If the profiler samples the current thread and walks the label stack while
+// this RAII object is on the stack, it will copy the supplied string into the
+// profile buffer. So there's one string copy operation, and it happens at
+// sample time.
+//
+// Compare this to the plain AUTO_BASE_PROFILER_LABEL macro, which only accepts
+// literal strings: When the label stack frames generated by
+// AUTO_BASE_PROFILER_LABEL are sampled, no string copy needs to be made because
+// the profile buffer can just store the raw pointers to the literal strings.
+// Consequently, AUTO_BASE_PROFILER_LABEL frames take up considerably less space
+// in the profile buffer than AUTO_BASE_PROFILER_LABEL_DYNAMIC_* frames.
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \
+ ::mozilla::baseprofiler::AutoProfilerLabel PROFILER_RAII( \
+ label, cStr, \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR, but takes an std::string.
+//
+// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and
+// the AutoProfilerLabel are appropriate, while also not incurring the runtime
+// cost of the string assignment unless the profiler is active. Therefore,
+// unlike AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR,
+// this macro doesn't push/pop a label when the profiler is inactive.
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str) \
+ Maybe<std::string> autoStr; \
+ Maybe<::mozilla::baseprofiler::AutoProfilerLabel> raiiObjectString; \
+ if (::mozilla::baseprofiler::profiler_is_active()) { \
+ autoStr.emplace(str); \
+ raiiObjectString.emplace( \
+ label, autoStr->c_str(), \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair); \
+ }
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but accepting a JSContext* parameter,
+// and a no-op if the profiler is disabled. Used to annotate functions for which
+// overhead in the range of nanoseconds is noticeable. It avoids overhead from
+// the TLS lookup because it can get the ProfilingStack from the JS context, and
+// avoids almost all overhead in the case where the profiler is disabled.
+# define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx) \
+ ::mozilla::baseprofiler::AutoProfilerLabel PROFILER_RAII( \
+ ctx, label, nullptr, \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_FAST, but also takes an extra string and
+// an additional set of flags. The flags parameter should carry values from the
+// ProfilingStackFrame::Flags enum.
+# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \
+ categoryPair, ctx, flags) \
+ ::mozilla::baseprofiler::AutoProfilerLabel PROFILER_RAII( \
+ ctx, label, dynamicString, \
+ ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, flags)
+
+// This class creates a non-owning ProfilingStack reference. Objects of this
+// class are stack-allocated, and so exist within a thread, and are thus bounded
+// by the lifetime of the thread, which ensures that the references held can't
+// be used after the ProfilingStack is destroyed.
+class MOZ_RAII AutoProfilerLabel {
+ public:
+ // This is the AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC
+ // variant.
+ AutoProfilerLabel(const char* aLabel, const char* aDynamicString,
+ ProfilingCategoryPair aCategoryPair, uint32_t aFlags = 0) {
+ // Get the ProfilingStack from TLS.
+ Push(GetProfilingStack(), aLabel, aDynamicString, aCategoryPair, aFlags);
+ }
+
+ void Push(ProfilingStack* aProfilingStack, const char* aLabel,
+ const char* aDynamicString, ProfilingCategoryPair aCategoryPair,
+ uint32_t aFlags = 0) {
+ // This function runs both on and off the main thread.
+
+ mProfilingStack = aProfilingStack;
+ if (mProfilingStack) {
+ mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this,
+ aCategoryPair, aFlags);
+ }
+ }
+
+ ~AutoProfilerLabel() {
+ // This function runs both on and off the main thread.
+
+ if (mProfilingStack) {
+ mProfilingStack->pop();
+ }
+ }
+
+ MFBT_API static ProfilingStack* GetProfilingStack();
+
+ private:
+ // We save a ProfilingStack pointer in the ctor so we don't have to redo the
+ // TLS lookup in the dtor.
+ ProfilingStack* mProfilingStack;
+
+ public:
+ // See the comment on the definition in platform.cpp for details about this.
+ static MOZ_THREAD_LOCAL(ProfilingStack*) sProfilingStack;
+};
+
+} // namespace mozilla::baseprofiler
+
+#endif // !MOZ_GECKO_PROFILER
+
+#endif // BaseProfilerLabels_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
new file mode 100644
index 0000000000..404e15c5f6
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
@@ -0,0 +1,125 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkerTypes_h
+#define BaseProfilerMarkerTypes_h
+
+// This header contains common marker type definitions.
+//
+// It #include's "mozilla/BaseProfilerMarkers.h", see that file for how to
+// define other marker types, and how to add markers to the profiler buffers.
+//
+// If you don't need to use these common types, #include
+// "mozilla/BaseProfilerMarkers.h" instead.
+//
+// Types in this files can be defined without relying on xpcom.
+// Others are defined in "ProfilerMarkerTypes.h".
+
+// !!! /!\ WORK IN PROGRESS /!\ !!!
+// This file contains draft marker definitions, but most are not used yet.
+// Further work is needed to complete these definitions, and use them to convert
+// existing PROFILER_ADD_MARKER calls. See meta bug 1661394.
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+namespace mozilla::baseprofiler::markers {
+
+struct MediaSampleMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("MediaSample");
+ }
+ static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+ int64_t aSampleStartTimeUs,
+ int64_t aSampleEndTimeUs,
+ int64_t aQueueLength) {
+ aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs);
+ aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs);
+ aWriter.IntProperty("queueLength", aQueueLength);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time",
+ MS::Format::Microseconds);
+ schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time",
+ MS::Format::Microseconds);
+ schema.AddKeyLabelFormat("queueLength", "Queue length",
+ MS::Format::Integer);
+ return schema;
+ }
+};
+
+struct VideoFallingBehindMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("VideoFallingBehind");
+ }
+ static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+ int64_t aVideoFrameStartTimeUs,
+ int64_t aMediaCurrentTimeUs) {
+ aWriter.IntProperty("videoFrameStartTimeUs", aVideoFrameStartTimeUs);
+ aWriter.IntProperty("mediaCurrentTimeUs", aMediaCurrentTimeUs);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.AddKeyLabelFormat("videoFrameStartTimeUs", "Video frame start time",
+ MS::Format::Microseconds);
+ schema.AddKeyLabelFormat("mediaCurrentTimeUs", "Media current time",
+ MS::Format::Microseconds);
+ return schema;
+ }
+};
+
+struct ContentBuildMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("CONTENT_FULL_PAINT_TIME");
+ }
+ static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter) {}
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ return schema;
+ }
+};
+
+struct MediaEngineMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("MediaEngine");
+ }
+ static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+ uint64_t aMediaEngineId) {
+ aWriter.IntProperty("id", aMediaEngineId);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.AddKeyLabelFormat("id", "Id", MS::Format::Integer);
+ return schema;
+ }
+};
+
+struct MediaEngineTextMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("MediaEngineText");
+ }
+ static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+ uint64_t aMediaEngineId,
+ const ProfilerString8View& aText) {
+ aWriter.IntProperty("id", aMediaEngineId);
+ aWriter.StringProperty("text", aText);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.AddKeyLabelFormat("id", "Id", MS::Format::Integer);
+ schema.AddKeyLabelFormat("text", "Details", MS::Format::String);
+ return schema;
+ }
+};
+
+} // namespace mozilla::baseprofiler::markers
+
+#endif // BaseProfilerMarkerTypes_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkers.h b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
new file mode 100644
index 0000000000..d706cefd4a
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
@@ -0,0 +1,255 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Markers are useful to delimit something important happening such as the first
+// paint. Unlike labels, which are only recorded in the profile buffer if a
+// sample is collected while the label is on the label stack, markers will
+// always be recorded in the profile buffer.
+//
+// This header contains basic definitions necessary to create marker types, and
+// to add markers to the profiler buffers.
+//
+// If basic marker types are needed, #include
+// "mozilla/BaseProfilerMarkerTypes.h" instead.
+//
+// But if you want to create your own marker type locally, you can #include this
+// header only; look at mozilla/BaseProfilerMarkerTypes.h for examples of how to
+// define types, and mozilla/BaseProfilerMarkerPrerequisites.h for some
+// supporting types.
+//
+// To then record markers:
+// - Use `baseprofiler::AddMarker(...)` from mozglue or other libraries that
+// are outside of xul, especially if they may happen outside of xpcom's
+// lifetime (typically startup, shutdown, or tests).
+// - Otherwise #include "ProfilerMarkers.h" instead, and use
+// `profiler_add_marker(...)`.
+// See these functions for more details.
+
+#ifndef BaseProfilerMarkers_h
+#define BaseProfilerMarkers_h
+
+#include "mozilla/BaseProfilerMarkersDetail.h"
+#include "mozilla/BaseProfilerLabels.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Unused.h"
+
+#include <functional>
+#include <string>
+#include <utility>
+
+namespace mozilla::baseprofiler {
+
+#ifdef MOZ_GECKO_PROFILER
+// Forward-declaration. TODO: Move to more common header, see bug 1681416.
+MFBT_API bool profiler_capture_backtrace_into(
+ ProfileChunkedBuffer& aChunkedBuffer, StackCaptureOptions aCaptureOptions);
+
+// Add a marker to a given buffer. `AddMarker()` and related macros should be
+// used in most cases, see below for more information about them and the
+// parameters; This function may be useful when markers need to be recorded in a
+// local buffer outside of the main profiler buffer.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+ const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+ MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) {
+ Unused << aMarkerType; // Only the empty object type is useful.
+ AUTO_BASE_PROFILER_LABEL("baseprofiler::AddMarkerToBuffer", PROFILER);
+ return base_profiler_markers_detail::AddMarkerToBuffer<MarkerType>(
+ aBuffer, aName, aCategory, std::move(aOptions),
+ // Do not capture a stack if the NoMarkerStacks feature is set.
+ profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)
+ ? ::mozilla::baseprofiler::profiler_capture_backtrace_into
+ : nullptr,
+ aPayloadArguments...);
+}
+
+// Add a marker (without payload) to a given buffer.
+inline ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+ const MarkerCategory& aCategory, MarkerOptions&& aOptions = {}) {
+ return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions),
+ markers::NoPayload{});
+}
+#endif // MOZ_GECKO_PROFILER
+
+// Add a marker to the Base Profiler buffer.
+// - aName: Main name of this marker.
+// - aCategory: Category for this marker.
+// - aOptions: Optional settings (such as timing, inner window id,
+// backtrace...), see `MarkerOptions` for details.
+// - aMarkerType: Empty object that specifies the type of marker.
+// - aPayloadArguments: Arguments expected by this marker type's
+// ` StreamJSONMarkerData` function.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarker(
+ const ProfilerString8View& aName, const MarkerCategory& aCategory,
+ MarkerOptions&& aOptions, MarkerType aMarkerType,
+ const PayloadArguments&... aPayloadArguments) {
+#ifndef MOZ_GECKO_PROFILER
+ return {};
+#else
+ // Record base markers whenever the core buffer is in session.
+ // TODO: When profiler_thread_is_being_profiled becomes available from
+ // mozglue, use it instead.
+ ProfileChunkedBuffer& coreBuffer =
+ ::mozilla::baseprofiler::profiler_get_core_buffer();
+ if (!coreBuffer.IsInSession()) {
+ return {};
+ }
+ return ::mozilla::baseprofiler::AddMarkerToBuffer(
+ coreBuffer, aName, aCategory, std::move(aOptions), aMarkerType,
+ aPayloadArguments...);
+#endif
+}
+
+// Add a marker (without payload) to the Base Profiler buffer.
+inline ProfileBufferBlockIndex AddMarker(const ProfilerString8View& aName,
+ const MarkerCategory& aCategory,
+ MarkerOptions&& aOptions = {}) {
+ return AddMarker(aName, aCategory, std::move(aOptions), markers::NoPayload{});
+}
+
+} // namespace mozilla::baseprofiler
+
+// Same as `AddMarker()` (without payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...) \
+ do { \
+ AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_UNTYPED); \
+ ::mozilla::baseprofiler::AddMarker( \
+ markerName, ::mozilla::baseprofiler::category::categoryName, \
+ ##__VA_ARGS__); \
+ } while (false)
+
+// Same as `AddMarker()` (with payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \
+ ...) \
+ do { \
+ AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_with_##MarkerType); \
+ ::mozilla::baseprofiler::AddMarker( \
+ markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+ ::mozilla::baseprofiler::markers::MarkerType{}, ##__VA_ARGS__); \
+ } while (false)
+
+namespace mozilla::baseprofiler::markers {
+// Most common marker type. Others are in BaseProfilerMarkerTypes.h.
+struct TextMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("Text");
+ }
+ static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+ const ProfilerString8View& aText) {
+ aWriter.StringProperty("name", aText);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.SetChartLabel("{marker.data.name}");
+ schema.SetTableLabel("{marker.name} - {marker.data.name}");
+ schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String,
+ MS::Searchable::Searchable);
+ return schema;
+ }
+};
+
+// Keep this struct in sync with the `gecko_profiler::marker::Tracing` Rust
+// counterpart.
+struct Tracing {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("tracing");
+ }
+ static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+ const ProfilerString8View& aCategory) {
+ if (aCategory.Length() != 0) {
+ aWriter.StringProperty("category", aCategory);
+ }
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable,
+ MS::Location::TimelineOverview};
+ schema.AddKeyLabelFormatSearchable("category", "Type", MS::Format::String,
+ MS::Searchable::Searchable);
+ return schema;
+ }
+};
+} // namespace mozilla::baseprofiler::markers
+
+// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is
+// not #defined.
+#define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \
+ do { \
+ AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_TEXT); \
+ ::mozilla::baseprofiler::AddMarker( \
+ markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+ ::mozilla::baseprofiler::markers::TextMarker{}, text); \
+ } while (false)
+
+namespace mozilla::baseprofiler {
+
+// RAII object that adds a BASE_PROFILER_MARKER_TEXT when destroyed; the
+// marker's timing will be the interval from construction (unless an instant or
+// start time is already specified in the provided options) until destruction.
+class MOZ_RAII AutoProfilerTextMarker {
+ public:
+ AutoProfilerTextMarker(const char* aMarkerName,
+ const MarkerCategory& aCategory,
+ MarkerOptions&& aOptions, const std::string& aText)
+ : mMarkerName(aMarkerName),
+ mCategory(aCategory),
+ mOptions(std::move(aOptions)),
+ mText(aText) {
+ MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(),
+ "AutoProfilerTextMarker options shouldn't have an end time");
+ if (profiler_is_active_and_unpaused() &&
+ mOptions.Timing().StartTime().IsNull()) {
+ mOptions.Set(MarkerTiming::InstantNow());
+ }
+ }
+
+ ~AutoProfilerTextMarker() {
+ if (profiler_is_active_and_unpaused()) {
+ mOptions.TimingRef().SetIntervalEnd();
+ AUTO_PROFILER_STATS(AUTO_BASE_PROFILER_MARKER_TEXT);
+ AddMarker(ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+ mCategory, std::move(mOptions), markers::TextMarker{}, mText);
+ }
+ }
+
+ protected:
+ const char* mMarkerName;
+ MarkerCategory mCategory;
+ MarkerOptions mOptions;
+ std::string mText;
+};
+
+#ifdef MOZ_GECKO_PROFILER
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarker(const ProfilerString8View&, const MarkerCategory&, MarkerOptions&&,
+ markers::TextMarker, const std::string&);
+
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+ const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+extern template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+ MarkerOptions&&, markers::TextMarker, const std::string&);
+#endif // MOZ_GECKO_PROFILER
+
+} // namespace mozilla::baseprofiler
+
+// Creates an AutoProfilerTextMarker RAII object. This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, \
+ text) \
+ ::mozilla::baseprofiler::AutoProfilerTextMarker PROFILER_RAII( \
+ markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+ text)
+
+#endif // BaseProfilerMarkers_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
new file mode 100644
index 0000000000..1102499100
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
@@ -0,0 +1,741 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkersDetail_h
+#define BaseProfilerMarkersDetail_h
+
+#ifndef BaseProfilerMarkers_h
+# error "This header should only be #included by BaseProfilerMarkers.h"
+#endif
+
+#include "mozilla/BaseProfilerMarkersPrerequisites.h"
+
+// ~~ HERE BE DRAGONS ~~
+//
+// Everything below is internal implementation detail, you shouldn't need to
+// look at it unless working on the profiler code.
+
+#include "mozilla/BaseProfileJSONWriter.h"
+#include "mozilla/ProfileBufferEntryKinds.h"
+
+#include <limits>
+#include <tuple>
+#include <type_traits>
+
+namespace mozilla::baseprofiler {
+// Implemented in platform.cpp
+MFBT_API ProfileChunkedBuffer& profiler_get_core_buffer();
+} // namespace mozilla::baseprofiler
+
+namespace mozilla::base_profiler_markers_detail {
+
+struct Streaming {
+ // A `MarkerDataDeserializer` is a free function that can read a serialized
+ // payload from an `EntryReader` and streams it as JSON object properties.
+ using MarkerDataDeserializer = void (*)(ProfileBufferEntryReader&,
+ baseprofiler::SpliceableJSONWriter&);
+
+ // A `MarkerTypeNameFunction` is a free function that returns the name of the
+ // marker type.
+ using MarkerTypeNameFunction = Span<const char> (*)();
+
+ // A `MarkerSchemaFunction` is a free function that returns a
+ // `MarkerSchema`, which contains all the information needed to stream
+ // the display schema associated with a marker type.
+ using MarkerSchemaFunction = MarkerSchema (*)();
+
+ struct MarkerTypeFunctions {
+ MarkerDataDeserializer mMarkerDataDeserializer = nullptr;
+ MarkerTypeNameFunction mMarkerTypeNameFunction = nullptr;
+ MarkerSchemaFunction mMarkerSchemaFunction = nullptr;
+ };
+
+ // A `DeserializerTag` will be added before the payload, to help select the
+ // correct deserializer when reading back the payload.
+ using DeserializerTag = uint8_t;
+
+ // Store a deserializer (and other marker-type-specific functions) and get its
+ // `DeserializerTag`.
+ // This is intended to be only used once per deserializer when a new marker
+ // type is used for the first time, so it should be called to initialize a
+ // `static const` tag that will be re-used by all markers of the corresponding
+ // payload type -- see use below.
+ MFBT_API static DeserializerTag TagForMarkerTypeFunctions(
+ MarkerDataDeserializer aDeserializer,
+ MarkerTypeNameFunction aMarkerTypeNameFunction,
+ MarkerSchemaFunction aMarkerSchemaFunction);
+
+ // Get the `MarkerDataDeserializer` for a given `DeserializerTag`.
+ MFBT_API static MarkerDataDeserializer DeserializerForTag(
+ DeserializerTag aTag);
+
+ // Retrieve all MarkerTypeFunctions's.
+ // While this object lives, no other operations can happen on this list.
+ class LockedMarkerTypeFunctionsList {
+ public:
+ MFBT_API LockedMarkerTypeFunctionsList();
+ MFBT_API ~LockedMarkerTypeFunctionsList();
+
+ LockedMarkerTypeFunctionsList(const LockedMarkerTypeFunctionsList&) =
+ delete;
+ LockedMarkerTypeFunctionsList& operator=(
+ const LockedMarkerTypeFunctionsList&) = delete;
+
+ auto begin() const { return mMarkerTypeFunctionsSpan.begin(); }
+ auto end() const { return mMarkerTypeFunctionsSpan.end(); }
+
+ private:
+ Span<const MarkerTypeFunctions> mMarkerTypeFunctionsSpan;
+ };
+};
+
+// This helper will examine a marker type's `StreamJSONMarkerData` function, see
+// specialization below.
+template <typename T>
+struct StreamFunctionTypeHelper;
+
+// Helper specialization that takes the expected
+// `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function and
+// provide information about the `...` parameters.
+template <typename R, typename... As>
+struct StreamFunctionTypeHelper<R(baseprofiler::SpliceableJSONWriter&, As...)> {
+ constexpr static size_t scArity = sizeof...(As);
+ using TupleType =
+ std::tuple<std::remove_cv_t<std::remove_reference_t<As>>...>;
+
+ // Serialization function that takes the exact same parameter types
+ // (const-ref'd) as `StreamJSONMarkerData`. This has to be inside the helper
+ // because only here can we access the raw parameter pack `As...`.
+ // And because we're using the same argument types through
+ // references-to-const, permitted implicit conversions can happen.
+ static ProfileBufferBlockIndex Serialize(
+ ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+ const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+ Streaming::DeserializerTag aDeserializerTag, const As&... aAs) {
+ // Note that options are first after the entry kind, because they contain
+ // the thread id, which is handled first to filter markers by threads.
+ return aBuffer.PutObjects(ProfileBufferEntryKind::Marker, aOptions, aName,
+ aCategory, aDeserializerTag,
+ MarkerPayloadType::Cpp, aAs...);
+ }
+};
+
+// Helper for a marker type.
+// A marker type is defined in a `struct` with some expected static member
+// functions. See example in BaseProfilerMarkers.h.
+template <typename MarkerType>
+struct MarkerTypeSerialization {
+ // Definitions to access the expected
+ // `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function
+ // and its parameters.
+ using StreamFunctionType =
+ StreamFunctionTypeHelper<decltype(MarkerType::StreamJSONMarkerData)>;
+ constexpr static size_t scStreamFunctionParameterCount =
+ StreamFunctionType::scArity;
+ using StreamFunctionUserParametersTuple =
+ typename StreamFunctionType::TupleType;
+ template <size_t i>
+ using StreamFunctionParameter =
+ std::tuple_element_t<i, StreamFunctionUserParametersTuple>;
+
+ template <typename... Ts>
+ static ProfileBufferBlockIndex Serialize(ProfileChunkedBuffer& aBuffer,
+ const ProfilerString8View& aName,
+ const MarkerCategory& aCategory,
+ MarkerOptions&& aOptions,
+ const Ts&... aTs) {
+ static_assert(!std::is_same_v<MarkerType,
+ ::mozilla::baseprofiler::markers::NoPayload>,
+ "NoPayload should have been handled in the caller.");
+ // Register marker type functions, and get the tag for this deserializer.
+ // Note that the tag is stored in a function-static object, and this
+ // function is static in a templated struct, so there should only be one tag
+ // per MarkerType.
+ // Making the tag class-static may have been more efficient (to avoid a
+ // thread-safe init check at every call), but random global static
+ // initialization order would make it more complex to coordinate with
+ // `Streaming::TagForMarkerTypeFunctions()`, and also would add a (small)
+ // cost for everybody, even the majority of users not using the profiler.
+ static const Streaming::DeserializerTag tag =
+ Streaming::TagForMarkerTypeFunctions(Deserialize,
+ MarkerType::MarkerTypeName,
+ MarkerType::MarkerTypeDisplay);
+ return StreamFunctionType::Serialize(aBuffer, aName, aCategory,
+ std::move(aOptions), tag, aTs...);
+ }
+
+ private:
+ // This templated function will recursively deserialize each argument expected
+ // by `MarkerType::StreamJSONMarkerData()` on the stack, and call it at the
+ // end. E.g., for `StreamJSONMarkerData(int, char)`:
+ // - DeserializeArguments<0>(aER, aWriter) reads an int and calls:
+ // - DeserializeArguments<1>(aER, aWriter, const int&) reads a char and calls:
+ // - MarkerType::StreamJSONMarkerData(aWriter, const int&, const char&).
+ // Prototyping on godbolt showed that clang and gcc can flatten these
+ // recursive calls into one function with successive reads followed by the one
+ // stream call; tested up to 40 arguments: https://godbolt.org/z/5KeeM4
+ template <size_t i = 0, typename... Args>
+ static void DeserializeArguments(ProfileBufferEntryReader& aEntryReader,
+ baseprofiler::SpliceableJSONWriter& aWriter,
+ const Args&... aArgs) {
+ static_assert(sizeof...(Args) == i,
+ "We should have collected `i` arguments so far");
+ if constexpr (i < scStreamFunctionParameterCount) {
+ // Deserialize the i-th argument on this stack.
+ auto argument = aEntryReader.ReadObject<StreamFunctionParameter<i>>();
+ // Add our local argument to the next recursive call.
+ DeserializeArguments<i + 1>(aEntryReader, aWriter, aArgs..., argument);
+ } else {
+ // We've read all the arguments, finally call the `StreamJSONMarkerData`
+ // function, which should write the appropriate JSON elements for this
+ // marker type. Note that the MarkerType-specific "type" element is
+ // already written.
+ MarkerType::StreamJSONMarkerData(aWriter, aArgs...);
+ }
+ }
+
+ public:
+ static void Deserialize(ProfileBufferEntryReader& aEntryReader,
+ baseprofiler::SpliceableJSONWriter& aWriter) {
+ aWriter.StringProperty("type", MarkerType::MarkerTypeName());
+ DeserializeArguments(aEntryReader, aWriter);
+ }
+};
+
+template <>
+struct MarkerTypeSerialization<::mozilla::baseprofiler::markers::NoPayload> {
+ // Nothing! NoPayload has special handling avoiding payload work.
+};
+
+template <typename MarkerType, typename... Ts>
+static ProfileBufferBlockIndex AddMarkerWithOptionalStackToBuffer(
+ ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+ const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+ const Ts&... aTs) {
+ if constexpr (std::is_same_v<MarkerType,
+ ::mozilla::baseprofiler::markers::NoPayload>) {
+ static_assert(sizeof...(Ts) == 0,
+ "NoPayload does not accept any payload arguments.");
+ // Special case for NoPayload where there is a stack or inner window id:
+ // Because these options would be stored in the payload 'data' object, but
+ // there is no such object for NoPayload, we convert the marker to another
+ // type (without user fields in the 'data' object), so that the stack and/or
+ // inner window id are not lost.
+ // TODO: Remove this when bug 1646714 lands.
+ if (aOptions.Stack().GetChunkedBuffer() ||
+ !aOptions.InnerWindowId().IsUnspecified()) {
+ struct NoPayloadUserData {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("NoPayloadUserData");
+ }
+ static void StreamJSONMarkerData(
+ baseprofiler::SpliceableJSONWriter& aWriter) {
+ // No user payload.
+ }
+ static mozilla::MarkerSchema MarkerTypeDisplay() {
+ using MS = mozilla::MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ // No user data to display.
+ return schema;
+ }
+ };
+ return MarkerTypeSerialization<NoPayloadUserData>::Serialize(
+ aBuffer, aName, aCategory, std::move(aOptions));
+ }
+
+ // Note that options are first after the entry kind, because they contain
+ // the thread id, which is handled first to filter markers by threads.
+ return aBuffer.PutObjects(
+ ProfileBufferEntryKind::Marker, std::move(aOptions), aName, aCategory,
+ base_profiler_markers_detail::Streaming::DeserializerTag(0));
+ } else {
+ return MarkerTypeSerialization<MarkerType>::Serialize(
+ aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+ }
+}
+
+// Pointer to a function that can capture a backtrace into the provided
+// `ProfileChunkedBuffer`, and returns true when successful.
+using OptionalBacktraceCaptureFunction = bool (*)(ProfileChunkedBuffer&,
+ StackCaptureOptions);
+
+// Use a pre-allocated and cleared chunked buffer in the main thread's
+// `AddMarkerToBuffer()`.
+// Null if not the main thread, or if profilers are not active.
+MFBT_API ProfileChunkedBuffer* GetClearedBufferForMainThreadAddMarker();
+// Called by the profiler(s) when starting/stopping. Safe to nest.
+MFBT_API void EnsureBufferForMainThreadAddMarker();
+MFBT_API void ReleaseBufferForMainThreadAddMarker();
+
+// Add a marker with the given name, options, and arguments to the given buffer.
+// Because this may be called from either Base or Gecko Profiler functions, the
+// appropriate backtrace-capturing function must also be provided.
+template <typename MarkerType, typename... Ts>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+ const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+ OptionalBacktraceCaptureFunction aOptionalBacktraceCaptureFunction,
+ const Ts&... aTs) {
+ if (aOptions.ThreadId().IsUnspecified()) {
+ // If yet unspecified, set thread to this thread where the marker is added.
+ aOptions.Set(MarkerThreadId::CurrentThread());
+ }
+
+ if (aOptions.IsTimingUnspecified()) {
+ // If yet unspecified, set timing to this instant of adding the marker.
+ aOptions.Set(MarkerTiming::InstantNow());
+ }
+
+ StackCaptureOptions captureOptions = aOptions.Stack().CaptureOptions();
+ if (captureOptions != StackCaptureOptions::NoStack &&
+ // Backtrace capture function will be nullptr if the profiler
+ // NoMarkerStacks feature is set.
+ aOptionalBacktraceCaptureFunction != nullptr) {
+ // A capture was requested, let's attempt to do it here&now. This avoids a
+ // lot of allocations that would be necessary if capturing a backtrace
+ // separately.
+ // TODO reduce internal profiler stack levels, see bug 1659872.
+ auto CaptureStackAndAddMarker = [&](ProfileChunkedBuffer& aChunkedBuffer) {
+ aOptions.StackRef().UseRequestedBacktrace(
+ aOptionalBacktraceCaptureFunction(aChunkedBuffer, captureOptions)
+ ? &aChunkedBuffer
+ : nullptr);
+ // This call must be made from here, while chunkedBuffer is in scope.
+ return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+ aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+ };
+
+ if (ProfileChunkedBuffer* buffer = GetClearedBufferForMainThreadAddMarker();
+ buffer) {
+ // Use a pre-allocated buffer for the main thread (because it's the most
+ // used thread, and most sensitive to overhead), so it's only allocated
+ // once. It could be null if this is not the main thread, or no profilers
+ // are currently active.
+ return CaptureStackAndAddMarker(*buffer);
+ }
+ // TODO use a local on-stack byte buffer to remove last allocation.
+ ProfileBufferChunkManagerSingle chunkManager(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize);
+ ProfileChunkedBuffer chunkedBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+ return CaptureStackAndAddMarker(chunkedBuffer);
+ }
+
+ return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+ aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+}
+
+// Assuming aEntryReader points right after the entry type (being Marker), this
+// reads the remainder of the marker and outputs it.
+// - GetWriterForThreadCallback, called first, after the thread id is read:
+// (ThreadId) -> SpliceableJSONWriter* or null
+// If null, nothing will be output, but aEntryReader will still be read fully.
+// - StackCallback, only called if GetWriterForThreadCallback didn't return
+// null, and if the marker contains a stack:
+// (ProfileChunkedBuffer&) -> void
+// - RustMarkerCallback, only called if GetWriterForThreadCallback didn't return
+// null, and if the marker contains a Rust payload:
+// (DeserializerTag) -> void
+template <typename GetWriterForThreadCallback, typename StackCallback,
+ typename RustMarkerCallback>
+void DeserializeAfterKindAndStream(
+ ProfileBufferEntryReader& aEntryReader,
+ GetWriterForThreadCallback&& aGetWriterForThreadCallback,
+ StackCallback&& aStackCallback, RustMarkerCallback&& aRustMarkerCallback) {
+ // Each entry is made up of the following:
+ // ProfileBufferEntry::Kind::Marker, <- already read by caller
+ // options, <- next location in entries
+ // name,
+ // payload
+ const MarkerOptions options = aEntryReader.ReadObject<MarkerOptions>();
+
+ baseprofiler::SpliceableJSONWriter* writer =
+ std::forward<GetWriterForThreadCallback>(aGetWriterForThreadCallback)(
+ options.ThreadId().ThreadId());
+ if (!writer) {
+ // No writer associated with this thread id, drop it.
+ aEntryReader.SetRemainingBytes(0);
+ return;
+ }
+
+ // Write the information to JSON with the following schema:
+ // [name, startTime, endTime, phase, category, data]
+ writer->StartArrayElement();
+ {
+ writer->UniqueStringElement(aEntryReader.ReadObject<ProfilerString8View>());
+
+ const double startTime = options.Timing().GetStartTime();
+ writer->TimeDoubleMsElement(startTime);
+
+ const double endTime = options.Timing().GetEndTime();
+ writer->TimeDoubleMsElement(endTime);
+
+ writer->IntElement(static_cast<int64_t>(options.Timing().MarkerPhase()));
+
+ MarkerCategory category = aEntryReader.ReadObject<MarkerCategory>();
+ writer->IntElement(static_cast<int64_t>(category.GetCategory()));
+
+ if (const auto tag =
+ aEntryReader.ReadObject<mozilla::base_profiler_markers_detail::
+ Streaming::DeserializerTag>();
+ tag != 0) {
+ writer->StartObjectElement();
+ {
+ // Stream "common props".
+
+ // TODO: Move this to top-level tuple, when frontend supports it.
+ if (!options.InnerWindowId().IsUnspecified()) {
+ // Here, we are converting uint64_t to double. Both Browsing Context
+ // and Inner Window IDs are created using
+ // `nsContentUtils::GenerateProcessSpecificId`, which is specifically
+ // designed to only use 53 of the 64 bits to be lossless when passed
+ // into and out of JS as a double.
+ writer->DoubleProperty(
+ "innerWindowID",
+ static_cast<double>(options.InnerWindowId().Id()));
+ }
+
+ // TODO: Move this to top-level tuple, when frontend supports it.
+ if (ProfileChunkedBuffer* chunkedBuffer =
+ options.Stack().GetChunkedBuffer();
+ chunkedBuffer) {
+ writer->StartObjectProperty("stack");
+ { std::forward<StackCallback>(aStackCallback)(*chunkedBuffer); }
+ writer->EndObject();
+ }
+
+ auto payloadType = static_cast<mozilla::MarkerPayloadType>(
+ aEntryReader.ReadObject<
+ std::underlying_type_t<mozilla::MarkerPayloadType>>());
+
+ // Stream the payload, including the type.
+ switch (payloadType) {
+ case mozilla::MarkerPayloadType::Cpp: {
+ mozilla::base_profiler_markers_detail::Streaming::
+ MarkerDataDeserializer deserializer =
+ mozilla::base_profiler_markers_detail::Streaming::
+ DeserializerForTag(tag);
+ MOZ_RELEASE_ASSERT(deserializer);
+ deserializer(aEntryReader, *writer);
+ MOZ_ASSERT(aEntryReader.RemainingBytes() == 0u);
+ break;
+ }
+ case mozilla::MarkerPayloadType::Rust:
+ std::forward<RustMarkerCallback>(aRustMarkerCallback)(tag);
+ MOZ_ASSERT(aEntryReader.RemainingBytes() == 0u);
+ break;
+ default:
+ MOZ_ASSERT_UNREACHABLE("Unknown payload type.");
+ break;
+ }
+ }
+ writer->EndObject();
+ }
+ }
+ writer->EndArray();
+ MOZ_ASSERT(aEntryReader.RemainingBytes() == 0u);
+}
+
+} // namespace mozilla::base_profiler_markers_detail
+
+namespace mozilla {
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: ProfilerStringView<CHAR>
+
+// The serialization starts with a ULEB128 number that encodes both whether the
+// ProfilerStringView is literal (Least Significant Bit = 0) or not (LSB = 1),
+// plus the string length (excluding null terminator) in bytes, shifted left by
+// 1 bit. Following that number:
+// - If literal, the string pointer value.
+// - If non-literal, the contents as bytes (excluding null terminator if any).
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<ProfilerStringView<CHAR>> {
+ static Length Bytes(const ProfilerStringView<CHAR>& aString) {
+ MOZ_RELEASE_ASSERT(
+ aString.Length() < std::numeric_limits<Length>::max() / 2,
+ "Double the string length doesn't fit in Length type");
+ const Length stringLength = static_cast<Length>(aString.Length());
+ if (aString.IsLiteral()) {
+ // Literal -> Length shifted left and LSB=0, then pointer.
+ return ULEB128Size(stringLength << 1 | 0u) +
+ static_cast<ProfileChunkedBuffer::Length>(sizeof(const CHAR*));
+ }
+ // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+ return ULEB128Size((stringLength << 1) | 1u) + stringLength * sizeof(CHAR);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const ProfilerStringView<CHAR>& aString) {
+ MOZ_RELEASE_ASSERT(
+ aString.Length() < std::numeric_limits<Length>::max() / 2,
+ "Double the string length doesn't fit in Length type");
+ const Span<const CHAR> span = aString;
+ if (aString.IsLiteral()) {
+ // Literal -> Length shifted left and LSB=0, then pointer.
+ aEW.WriteULEB128(span.Length() << 1 | 0u);
+ aEW.WriteObject(WrapProfileBufferRawPointer(span.Elements()));
+ return;
+ }
+ // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+ aEW.WriteULEB128(span.Length() << 1 | 1u);
+ aEW.WriteBytes(span.Elements(), span.LengthBytes());
+ }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<ProfilerStringView<CHAR>> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ ProfilerStringView<CHAR>& aString) {
+ aString = Read(aER);
+ }
+
+ static ProfilerStringView<CHAR> Read(ProfileBufferEntryReader& aER) {
+ const Length lengthAndIsLiteral = aER.ReadULEB128<Length>();
+ const Length stringLength = lengthAndIsLiteral >> 1;
+ if ((lengthAndIsLiteral & 1u) == 0u) {
+ // LSB==0 -> Literal string, read the string pointer.
+ return ProfilerStringView<CHAR>(
+ aER.ReadObject<const CHAR*>(), stringLength,
+ ProfilerStringView<CHAR>::Ownership::Literal);
+ }
+ // LSB==1 -> Not a literal string.
+ ProfileBufferEntryReader::DoubleSpanOfConstBytes spans =
+ aER.ReadSpans(stringLength * sizeof(CHAR));
+ if (MOZ_LIKELY(spans.IsSingleSpan()) &&
+ reinterpret_cast<uintptr_t>(spans.mFirstOrOnly.Elements()) %
+ alignof(CHAR) ==
+ 0u) {
+ // Only a single span, correctly aligned for the CHAR type, we can just
+ // refer to it directly, assuming that this ProfilerStringView will not
+ // outlive the chunk.
+ return ProfilerStringView<CHAR>(
+ reinterpret_cast<const CHAR*>(spans.mFirstOrOnly.Elements()),
+ stringLength, ProfilerStringView<CHAR>::Ownership::Reference);
+ } else {
+ // Two spans, we need to concatenate them; or one span, but misaligned.
+ // Allocate a buffer to store the string (plus terminal, for safety), and
+ // give it to the ProfilerStringView; Note that this is a secret use of
+ // ProfilerStringView, which is intended to only be used between
+ // deserialization and JSON streaming.
+ CHAR* buffer = new CHAR[stringLength + 1];
+ spans.CopyBytesTo(buffer);
+ buffer[stringLength] = CHAR(0);
+ return ProfilerStringView<CHAR>(
+ buffer, stringLength,
+ ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView);
+ }
+ }
+};
+
+// Serializer, Deserializer: MarkerCategory
+
+// The serialization contains both category numbers encoded as ULEB128.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerCategory> {
+ static Length Bytes(const MarkerCategory& aCategory) {
+ return ULEB128Size(static_cast<uint32_t>(aCategory.CategoryPair()));
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const MarkerCategory& aCategory) {
+ aEW.WriteULEB128(static_cast<uint32_t>(aCategory.CategoryPair()));
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerCategory> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ MarkerCategory& aCategory) {
+ aCategory = Read(aER);
+ }
+
+ static MarkerCategory Read(ProfileBufferEntryReader& aER) {
+ return MarkerCategory(static_cast<baseprofiler::ProfilingCategoryPair>(
+ aER.ReadULEB128<uint32_t>()));
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerTiming
+
+// The serialization starts with the marker phase, followed by one or two
+// timestamps as needed.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerTiming> {
+ static Length Bytes(const MarkerTiming& aTiming) {
+ MOZ_ASSERT(!aTiming.IsUnspecified());
+ const auto phase = aTiming.MarkerPhase();
+ switch (phase) {
+ case MarkerTiming::Phase::Instant:
+ return SumBytes(phase, aTiming.StartTime());
+ case MarkerTiming::Phase::Interval:
+ return SumBytes(phase, aTiming.StartTime(), aTiming.EndTime());
+ case MarkerTiming::Phase::IntervalStart:
+ return SumBytes(phase, aTiming.StartTime());
+ case MarkerTiming::Phase::IntervalEnd:
+ return SumBytes(phase, aTiming.EndTime());
+ default:
+ MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+ phase == MarkerTiming::Phase::Interval ||
+ phase == MarkerTiming::Phase::IntervalStart ||
+ phase == MarkerTiming::Phase::IntervalEnd);
+ return 0; // Only to avoid build errors.
+ }
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const MarkerTiming& aTiming) {
+ MOZ_ASSERT(!aTiming.IsUnspecified());
+ const auto phase = aTiming.MarkerPhase();
+ switch (phase) {
+ case MarkerTiming::Phase::Instant:
+ aEW.WriteObjects(phase, aTiming.StartTime());
+ return;
+ case MarkerTiming::Phase::Interval:
+ aEW.WriteObjects(phase, aTiming.StartTime(), aTiming.EndTime());
+ return;
+ case MarkerTiming::Phase::IntervalStart:
+ aEW.WriteObjects(phase, aTiming.StartTime());
+ return;
+ case MarkerTiming::Phase::IntervalEnd:
+ aEW.WriteObjects(phase, aTiming.EndTime());
+ return;
+ default:
+ MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+ phase == MarkerTiming::Phase::Interval ||
+ phase == MarkerTiming::Phase::IntervalStart ||
+ phase == MarkerTiming::Phase::IntervalEnd);
+ return;
+ }
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerTiming> {
+ static void ReadInto(ProfileBufferEntryReader& aER, MarkerTiming& aTiming) {
+ aTiming.mPhase = aER.ReadObject<MarkerTiming::Phase>();
+ switch (aTiming.mPhase) {
+ case MarkerTiming::Phase::Instant:
+ aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+ aTiming.mEndTime = TimeStamp{};
+ break;
+ case MarkerTiming::Phase::Interval:
+ aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+ aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+ break;
+ case MarkerTiming::Phase::IntervalStart:
+ aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+ aTiming.mEndTime = TimeStamp{};
+ break;
+ case MarkerTiming::Phase::IntervalEnd:
+ aTiming.mStartTime = TimeStamp{};
+ aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+ break;
+ default:
+ MOZ_RELEASE_ASSERT(aTiming.mPhase == MarkerTiming::Phase::Instant ||
+ aTiming.mPhase == MarkerTiming::Phase::Interval ||
+ aTiming.mPhase ==
+ MarkerTiming::Phase::IntervalStart ||
+ aTiming.mPhase == MarkerTiming::Phase::IntervalEnd);
+ break;
+ }
+ }
+
+ static MarkerTiming Read(ProfileBufferEntryReader& aER) {
+ TimeStamp start;
+ TimeStamp end;
+ auto phase = aER.ReadObject<MarkerTiming::Phase>();
+ switch (phase) {
+ case MarkerTiming::Phase::Instant:
+ start = aER.ReadObject<TimeStamp>();
+ break;
+ case MarkerTiming::Phase::Interval:
+ start = aER.ReadObject<TimeStamp>();
+ end = aER.ReadObject<TimeStamp>();
+ break;
+ case MarkerTiming::Phase::IntervalStart:
+ start = aER.ReadObject<TimeStamp>();
+ break;
+ case MarkerTiming::Phase::IntervalEnd:
+ end = aER.ReadObject<TimeStamp>();
+ break;
+ default:
+ MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+ phase == MarkerTiming::Phase::Interval ||
+ phase == MarkerTiming::Phase::IntervalStart ||
+ phase == MarkerTiming::Phase::IntervalEnd);
+ break;
+ }
+ return MarkerTiming(start, end, phase);
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerStack
+
+// The serialization only contains the `ProfileChunkedBuffer` from the
+// backtrace; if there is no backtrace or if it's empty, this will implicitly
+// store a nullptr (see
+// `ProfileBufferEntryWriter::Serializer<ProfilerChunkedBuffer*>`).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerStack> {
+ static Length Bytes(const MarkerStack& aStack) {
+ return SumBytes(aStack.GetChunkedBuffer());
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW, const MarkerStack& aStack) {
+ aEW.WriteObject(aStack.GetChunkedBuffer());
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerStack> {
+ static void ReadInto(ProfileBufferEntryReader& aER, MarkerStack& aStack) {
+ aStack = Read(aER);
+ }
+
+ static MarkerStack Read(ProfileBufferEntryReader& aER) {
+ return MarkerStack(aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>());
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerOptions
+
+// The serialization contains all members (either trivially-copyable, or they
+// provide their specialization above).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerOptions> {
+ static Length Bytes(const MarkerOptions& aOptions) {
+ return SumBytes(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+ aOptions.InnerWindowId());
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const MarkerOptions& aOptions) {
+ aEW.WriteObjects(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+ aOptions.InnerWindowId());
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerOptions> {
+ static void ReadInto(ProfileBufferEntryReader& aER, MarkerOptions& aOptions) {
+ aER.ReadIntoObjects(aOptions.mThreadId, aOptions.mTiming, aOptions.mStack,
+ aOptions.mInnerWindowId);
+ }
+
+ static MarkerOptions Read(ProfileBufferEntryReader& aER) {
+ MarkerOptions options;
+ ReadInto(aER, options);
+ return options;
+ }
+};
+
+} // namespace mozilla
+
+#endif // BaseProfilerMarkersDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
new file mode 100644
index 0000000000..729e17ab0b
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
@@ -0,0 +1,968 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains basic definitions required to create marker types, and
+// to add markers to the profiler buffers.
+//
+// In most cases, #include "mozilla/BaseProfilerMarkers.h" instead, or
+// #include "mozilla/BaseProfilerMarkerTypes.h" for common marker types.
+
+#ifndef BaseProfilerMarkersPrerequisites_h
+#define BaseProfilerMarkersPrerequisites_h
+
+namespace mozilla {
+
+enum class StackCaptureOptions {
+ NoStack, // No stack captured.
+ Full, // Capture a full stack, including label frames, JS frames and
+ // native frames.
+ NonNative, // Capture a stack without native frames for reduced overhead.
+};
+
+}
+
+#include "BaseProfileJSONWriter.h"
+#include "BaseProfilingCategory.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/BaseProfilerState.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Variant.h"
+
+#include <initializer_list>
+#include <string_view>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace mozilla {
+
+// Return a NotNull<const CHAR*> pointing at the literal empty string `""`.
+template <typename CHAR>
+constexpr const CHAR* LiteralEmptyStringPointer() {
+ static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+ "Only char and char16_t are supported in Firefox");
+ if constexpr (std::is_same_v<CHAR, char>) {
+ return "";
+ }
+ if constexpr (std::is_same_v<CHAR, char16_t>) {
+ return u"";
+ }
+}
+
+// Return a string_view<CHAR> pointing at the literal empty string.
+template <typename CHAR>
+constexpr std::basic_string_view<CHAR> LiteralEmptyStringView() {
+ static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+ "Only char and char16_t are supported in Firefox");
+ // Use `operator""sv()` from <string_view>.
+ using namespace std::literals::string_view_literals;
+ if constexpr (std::is_same_v<CHAR, char>) {
+ return ""sv;
+ }
+ if constexpr (std::is_same_v<CHAR, char16_t>) {
+ return u""sv;
+ }
+}
+
+// General string view, optimized for short on-stack life before serialization,
+// and between deserialization and JSON-streaming.
+template <typename CHAR>
+class MOZ_STACK_CLASS ProfilerStringView {
+ public:
+ // Default constructor points at "" (literal empty string).
+ constexpr ProfilerStringView() = default;
+
+ // Don't allow copy.
+ ProfilerStringView(const ProfilerStringView&) = delete;
+ ProfilerStringView& operator=(const ProfilerStringView&) = delete;
+
+ // Allow move. For consistency the moved-from string is always reset to "".
+ constexpr ProfilerStringView(ProfilerStringView&& aOther)
+ : mStringView(std::move(aOther.mStringView)),
+ mOwnership(aOther.mOwnership) {
+ if (mOwnership == Ownership::OwnedThroughStringView) {
+ // We now own the buffer, make the other point at the literal "".
+ aOther.mStringView = LiteralEmptyStringView<CHAR>();
+ aOther.mOwnership = Ownership::Literal;
+ }
+ }
+ constexpr ProfilerStringView& operator=(ProfilerStringView&& aOther) {
+ mStringView = std::move(aOther.mStringView);
+ mOwnership = aOther.mOwnership;
+ if (mOwnership == Ownership::OwnedThroughStringView) {
+ // We now own the buffer, make the other point at the literal "".
+ aOther.mStringView = LiteralEmptyStringView<CHAR>();
+ aOther.mOwnership = Ownership::Literal;
+ }
+ return *this;
+ }
+
+ ~ProfilerStringView() {
+ if (MOZ_UNLIKELY(mOwnership == Ownership::OwnedThroughStringView)) {
+ // We own the buffer pointed at by mStringView, destroy it.
+ // This is only used between deserialization and streaming.
+ delete mStringView.data();
+ }
+ }
+
+ // Implicit construction from nullptr, points at "" (literal empty string).
+ constexpr MOZ_IMPLICIT ProfilerStringView(decltype(nullptr)) {}
+
+ // Implicit constructor from a literal string.
+ template <size_t Np1>
+ constexpr MOZ_IMPLICIT ProfilerStringView(const CHAR (&aLiteralString)[Np1])
+ : ProfilerStringView(aLiteralString, Np1 - 1, Ownership::Literal) {}
+
+ // Constructor from a non-literal string.
+ constexpr ProfilerStringView(const CHAR* aString, size_t aLength)
+ : ProfilerStringView(aString, aLength, Ownership::Reference) {}
+
+ // Implicit constructor from a string_view.
+ constexpr MOZ_IMPLICIT ProfilerStringView(
+ const std::basic_string_view<CHAR>& aStringView)
+ : ProfilerStringView(aStringView.data(), aStringView.length(),
+ Ownership::Reference) {}
+
+ // Implicit constructor from an expiring string_view. We assume that the
+ // pointed-at string will outlive this ProfilerStringView.
+ constexpr MOZ_IMPLICIT ProfilerStringView(
+ std::basic_string_view<CHAR>&& aStringView)
+ : ProfilerStringView(aStringView.data(), aStringView.length(),
+ Ownership::Reference) {}
+
+ // Implicit constructor from std::string.
+ constexpr MOZ_IMPLICIT ProfilerStringView(
+ const std::basic_string<CHAR>& aString)
+ : ProfilerStringView(aString.data(), aString.length(),
+ Ownership::Reference) {}
+
+ // Construction from a raw pointer to a null-terminated string.
+ // This is a named class-static function to make it more obvious where work is
+ // being done (to determine the string length), and encourage users to instead
+ // provide a length, if already known.
+ // TODO: Find callers and convert them to constructor instead if possible.
+ static constexpr ProfilerStringView WrapNullTerminatedString(
+ const CHAR* aString) {
+ return ProfilerStringView(
+ aString, aString ? std::char_traits<CHAR>::length(aString) : 0,
+ Ownership::Reference);
+ }
+
+ // Implicit constructor for an object with member functions `Data()`
+ // `Length()`, and `IsLiteral()`, common in xpcom strings.
+ template <
+ typename String,
+ typename DataReturnType = decltype(std::declval<const String>().Data()),
+ typename LengthReturnType =
+ decltype(std::declval<const String>().Length()),
+ typename IsLiteralReturnType =
+ decltype(std::declval<const String>().IsLiteral()),
+ typename =
+ std::enable_if_t<std::is_convertible_v<DataReturnType, const CHAR*> &&
+ std::is_integral_v<LengthReturnType> &&
+ std::is_same_v<IsLiteralReturnType, bool>>>
+ constexpr MOZ_IMPLICIT ProfilerStringView(const String& aString)
+ : ProfilerStringView(
+ static_cast<const CHAR*>(aString.Data()), aString.Length(),
+ aString.IsLiteral() ? Ownership::Literal : Ownership::Reference) {}
+
+ [[nodiscard]] constexpr const std::basic_string_view<CHAR>& StringView()
+ const {
+ return mStringView;
+ }
+
+ [[nodiscard]] constexpr size_t Length() const { return mStringView.length(); }
+
+ [[nodiscard]] constexpr bool IsLiteral() const {
+ return mOwnership == Ownership::Literal;
+ }
+ [[nodiscard]] constexpr bool IsReference() const {
+ return mOwnership == Ownership::Reference;
+ }
+ // No `IsOwned...()` because it's a secret, only used internally!
+
+ [[nodiscard]] Span<const CHAR> AsSpan() const {
+ return Span<const CHAR>(mStringView.data(), mStringView.length());
+ }
+ [[nodiscard]] operator Span<const CHAR>() const { return AsSpan(); }
+
+ private:
+ enum class Ownership { Literal, Reference, OwnedThroughStringView };
+
+ // Allow deserializer to store anything here.
+ friend ProfileBufferEntryReader::Deserializer<ProfilerStringView>;
+
+ constexpr ProfilerStringView(const CHAR* aString, size_t aLength,
+ Ownership aOwnership)
+ : mStringView(aString ? std::basic_string_view<CHAR>(aString, aLength)
+ : LiteralEmptyStringView<CHAR>()),
+ mOwnership(aString ? aOwnership : Ownership::Literal) {}
+
+ // String view to an outside string (literal or reference).
+ // We may actually own the pointed-at buffer, but it is only used internally
+ // between deserialization and JSON streaming.
+ std::basic_string_view<CHAR> mStringView = LiteralEmptyStringView<CHAR>();
+
+ Ownership mOwnership = Ownership::Literal;
+};
+
+using ProfilerString8View = ProfilerStringView<char>;
+using ProfilerString16View = ProfilerStringView<char16_t>;
+
+// This compulsory marker parameter contains the required category information.
+class MarkerCategory {
+ public:
+ // Constructor from category pair (includes both super- and sub-categories).
+ constexpr explicit MarkerCategory(
+ baseprofiler::ProfilingCategoryPair aCategoryPair)
+ : mCategoryPair(aCategoryPair) {}
+
+ // Returns the stored category pair.
+ constexpr baseprofiler::ProfilingCategoryPair CategoryPair() const {
+ return mCategoryPair;
+ }
+
+ // Returns the super-category from the stored category pair.
+ baseprofiler::ProfilingCategory GetCategory() const {
+ return GetProfilingCategoryPairInfo(mCategoryPair).mCategory;
+ }
+
+ private:
+ baseprofiler::ProfilingCategoryPair mCategoryPair =
+ baseprofiler::ProfilingCategoryPair::OTHER;
+};
+
+namespace baseprofiler::category {
+
+// Each category pair name constructs a MarkerCategory.
+// E.g.: mozilla::baseprofiler::category::OTHER_Profiling
+// Profiler macros will take the category name alone without namespace.
+// E.g.: `PROFILER_MARKER_UNTYPED("name", OTHER_Profiling)`
+#define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) \
+ static constexpr MarkerCategory name{ProfilingCategoryPair::name};
+#define CATEGORY_ENUM_END_CATEGORY
+MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+ CATEGORY_ENUM_SUBCATEGORY,
+ CATEGORY_ENUM_END_CATEGORY)
+#undef CATEGORY_ENUM_BEGIN_CATEGORY
+#undef CATEGORY_ENUM_SUBCATEGORY
+#undef CATEGORY_ENUM_END_CATEGORY
+
+// Import `MarkerCategory` into this namespace. This will allow using this type
+// dynamically in macros that prepend `::mozilla::baseprofiler::category::` to
+// the given category, e.g.:
+// `PROFILER_MARKER_UNTYPED("name", MarkerCategory(...))`
+using MarkerCategory = ::mozilla::MarkerCategory;
+
+} // namespace baseprofiler::category
+
+// The classes below are all embedded in a `MarkerOptions` object.
+class MarkerOptions;
+
+// This marker option captures a given thread id.
+// If left unspecified (by default construction) during the add-marker call, the
+// current thread id will be used then.
+class MarkerThreadId {
+ public:
+ // Default constructor, keeps the thread id unspecified.
+ constexpr MarkerThreadId() = default;
+
+ // Constructor from a given thread id.
+ constexpr explicit MarkerThreadId(
+ baseprofiler::BaseProfilerThreadId aThreadId)
+ : mThreadId(aThreadId) {}
+
+ // Use the current thread's id.
+ static MarkerThreadId CurrentThread() {
+ return MarkerThreadId(baseprofiler::profiler_current_thread_id());
+ }
+
+ // Use the main thread's id. This can be useful to record a marker from a
+ // possibly-unregistered thread, and display it in the main thread track.
+ static MarkerThreadId MainThread() {
+ return MarkerThreadId(baseprofiler::profiler_main_thread_id());
+ }
+
+ [[nodiscard]] constexpr baseprofiler::BaseProfilerThreadId ThreadId() const {
+ return mThreadId;
+ }
+
+ [[nodiscard]] constexpr bool IsUnspecified() const {
+ return !mThreadId.IsSpecified();
+ }
+
+ private:
+ baseprofiler::BaseProfilerThreadId mThreadId;
+};
+
+// This marker option contains marker timing information.
+// This class encapsulates the logic for correctly storing a marker based on its
+// Use the static methods to create the MarkerTiming. This is a transient object
+// that is being used to enforce the constraints of the combinations of the
+// data.
+class MarkerTiming {
+ public:
+ // The following static methods are used to create the MarkerTiming based on
+ // the type that it is.
+
+ static MarkerTiming InstantAt(const TimeStamp& aTime) {
+ MOZ_ASSERT(!aTime.IsNull(), "Time is null for an instant marker.");
+ return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::Instant};
+ }
+
+ static MarkerTiming InstantNow() { return InstantAt(TimeStamp::Now()); }
+
+ static MarkerTiming Interval(const TimeStamp& aStartTime,
+ const TimeStamp& aEndTime) {
+ MOZ_ASSERT(!aStartTime.IsNull(),
+ "Start time is null for an interval marker.");
+ MOZ_ASSERT(!aEndTime.IsNull(), "End time is null for an interval marker.");
+ return MarkerTiming{aStartTime, aEndTime, MarkerTiming::Phase::Interval};
+ }
+
+ static MarkerTiming IntervalUntilNowFrom(const TimeStamp& aStartTime) {
+ return Interval(aStartTime, TimeStamp::Now());
+ }
+
+ static MarkerTiming IntervalStart(const TimeStamp& aTime = TimeStamp::Now()) {
+ MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval start marker.");
+ return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::IntervalStart};
+ }
+
+ static MarkerTiming IntervalEnd(const TimeStamp& aTime = TimeStamp::Now()) {
+ MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+ return MarkerTiming{TimeStamp{}, aTime, MarkerTiming::Phase::IntervalEnd};
+ }
+
+ // Set the interval end in this timing.
+ // If there was already a start time, this makes it a full interval.
+ void SetIntervalEnd(const TimeStamp& aTime = TimeStamp::Now()) {
+ MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+ mEndTime = aTime;
+ mPhase = mStartTime.IsNull() ? Phase::IntervalEnd : Phase::Interval;
+ }
+
+ [[nodiscard]] const TimeStamp& StartTime() const { return mStartTime; }
+ [[nodiscard]] const TimeStamp& EndTime() const { return mEndTime; }
+
+ enum class Phase : uint8_t {
+ Instant = 0,
+ Interval = 1,
+ IntervalStart = 2,
+ IntervalEnd = 3,
+ };
+
+ [[nodiscard]] Phase MarkerPhase() const {
+ MOZ_ASSERT(!IsUnspecified());
+ return mPhase;
+ }
+
+ // The following getter methods are used to put the value into the buffer for
+ // storage.
+ [[nodiscard]] double GetStartTime() const {
+ MOZ_ASSERT(!IsUnspecified());
+ // If mStartTime is null (e.g., for IntervalEnd), this will output 0.0 as
+ // expected.
+ return MarkerTiming::timeStampToDouble(mStartTime);
+ }
+
+ [[nodiscard]] double GetEndTime() const {
+ MOZ_ASSERT(!IsUnspecified());
+ // If mEndTime is null (e.g., for Instant or IntervalStart), this will
+ // output 0.0 as expected.
+ return MarkerTiming::timeStampToDouble(mEndTime);
+ }
+
+ [[nodiscard]] uint8_t GetPhase() const {
+ MOZ_ASSERT(!IsUnspecified());
+ return static_cast<uint8_t>(mPhase);
+ }
+
+ // This is a constructor for Rust FFI bindings. It must not be used outside of
+ // this! Please see the other static constructors above.
+ static void UnsafeConstruct(MarkerTiming* aMarkerTiming,
+ const TimeStamp& aStartTime,
+ const TimeStamp& aEndTime, Phase aPhase) {
+ new (aMarkerTiming) MarkerTiming{aStartTime, aEndTime, aPhase};
+ }
+
+ private:
+ friend ProfileBufferEntryWriter::Serializer<MarkerTiming>;
+ friend ProfileBufferEntryReader::Deserializer<MarkerTiming>;
+ friend MarkerOptions;
+
+ // Default timing leaves it internally "unspecified", serialization getters
+ // and add-marker functions will default to `InstantNow()`.
+ constexpr MarkerTiming() = default;
+
+ // This should only be used by internal profiler code.
+ [[nodiscard]] bool IsUnspecified() const {
+ return mStartTime.IsNull() && mEndTime.IsNull();
+ }
+
+ // Full constructor, used by static factory functions.
+ constexpr MarkerTiming(const TimeStamp& aStartTime, const TimeStamp& aEndTime,
+ Phase aPhase)
+ : mStartTime(aStartTime), mEndTime(aEndTime), mPhase(aPhase) {}
+
+ static double timeStampToDouble(const TimeStamp& time) {
+ if (time.IsNull()) {
+ // The Phase lets us know not to use this value.
+ return 0;
+ }
+ return (time - TimeStamp::ProcessCreation()).ToMilliseconds();
+ }
+
+ TimeStamp mStartTime;
+ TimeStamp mEndTime;
+ Phase mPhase = Phase::Instant;
+};
+
+// This marker option allows three cases:
+// - By default, no stacks are captured.
+// - The caller can request a stack capture, and the add-marker code will take
+// care of it in the most efficient way.
+// - The caller can still provide an existing backtrace, for cases where a
+// marker reports something that happened elsewhere.
+class MarkerStack {
+ public:
+ // Default constructor, no capture.
+ constexpr MarkerStack() = default;
+
+ // Disallow copy.
+ MarkerStack(const MarkerStack&) = delete;
+ MarkerStack& operator=(const MarkerStack&) = delete;
+
+ // Allow move.
+ MarkerStack(MarkerStack&& aOther)
+ : mCaptureOptions(aOther.mCaptureOptions),
+ mOptionalChunkedBufferStorage(
+ std::move(aOther.mOptionalChunkedBufferStorage)),
+ mChunkedBuffer(aOther.mChunkedBuffer) {
+ AssertInvariants();
+ aOther.Clear();
+ }
+ MarkerStack& operator=(MarkerStack&& aOther) {
+ mCaptureOptions = aOther.mCaptureOptions;
+ mOptionalChunkedBufferStorage =
+ std::move(aOther.mOptionalChunkedBufferStorage);
+ mChunkedBuffer = aOther.mChunkedBuffer;
+ AssertInvariants();
+ aOther.Clear();
+ return *this;
+ }
+
+ // Take ownership of a backtrace. If null or empty, equivalent to NoStack().
+ explicit MarkerStack(UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer)
+ : mOptionalChunkedBufferStorage(
+ (!aExternalChunkedBuffer || aExternalChunkedBuffer->IsEmpty())
+ ? nullptr
+ : std::move(aExternalChunkedBuffer)),
+ mChunkedBuffer(mOptionalChunkedBufferStorage.get()) {
+ AssertInvariants();
+ }
+
+ // Use an existing backtrace stored elsewhere, which the user must guarantee
+ // is alive during the add-marker call. If empty, equivalent to NoStack().
+ explicit MarkerStack(ProfileChunkedBuffer& aExternalChunkedBuffer)
+ : mChunkedBuffer(aExternalChunkedBuffer.IsEmpty()
+ ? nullptr
+ : &aExternalChunkedBuffer) {
+ AssertInvariants();
+ }
+
+ // Don't capture a stack in this marker.
+ static MarkerStack NoStack() {
+ return MarkerStack(StackCaptureOptions::NoStack);
+ }
+
+ // Capture a stack when adding this marker.
+ static MarkerStack Capture(
+ StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
+ // Actual capture will be handled inside profiler_add_marker.
+ return MarkerStack(aCaptureOptions);
+ }
+
+ // Optionally capture a stack, useful for avoiding long-winded ternaries.
+ static MarkerStack MaybeCapture(bool aDoCapture) {
+ return aDoCapture ? Capture() : NoStack();
+ }
+
+ // Use an existing backtrace stored elsewhere, which the user must guarantee
+ // is alive during the add-marker call. If empty, equivalent to NoStack().
+ static MarkerStack UseBacktrace(
+ ProfileChunkedBuffer& aExternalChunkedBuffer) {
+ return MarkerStack(aExternalChunkedBuffer);
+ }
+
+ // Take ownership of a backtrace previously captured with
+ // `profiler_capture_backtrace()`. If null, equivalent to NoStack().
+ static MarkerStack TakeBacktrace(
+ UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer) {
+ return MarkerStack(std::move(aExternalChunkedBuffer));
+ }
+
+ // Construct with the given capture options.
+ static MarkerStack WithCaptureOptions(StackCaptureOptions aCaptureOptions) {
+ return MarkerStack(aCaptureOptions);
+ }
+
+ [[nodiscard]] StackCaptureOptions CaptureOptions() const {
+ return mCaptureOptions;
+ }
+
+ ProfileChunkedBuffer* GetChunkedBuffer() const { return mChunkedBuffer; }
+
+ // Use backtrace after a request. If null, equivalent to NoStack().
+ void UseRequestedBacktrace(ProfileChunkedBuffer* aExternalChunkedBuffer) {
+ MOZ_RELEASE_ASSERT(mCaptureOptions != StackCaptureOptions::NoStack);
+ mCaptureOptions = StackCaptureOptions::NoStack;
+ if (aExternalChunkedBuffer && !aExternalChunkedBuffer->IsEmpty()) {
+ // We only need to use the provided buffer if it is not empty.
+ mChunkedBuffer = aExternalChunkedBuffer;
+ }
+ AssertInvariants();
+ }
+
+ void Clear() {
+ mCaptureOptions = StackCaptureOptions::NoStack;
+ mOptionalChunkedBufferStorage.reset();
+ mChunkedBuffer = nullptr;
+ AssertInvariants();
+ }
+
+ private:
+ explicit MarkerStack(StackCaptureOptions aCaptureOptions)
+ : mCaptureOptions(aCaptureOptions) {
+ AssertInvariants();
+ }
+
+ // This should be called after every constructor and non-const function.
+ void AssertInvariants() const {
+#ifdef DEBUG
+ if (mCaptureOptions != StackCaptureOptions::NoStack) {
+ MOZ_ASSERT(!mOptionalChunkedBufferStorage,
+ "We should not hold a buffer when capture is requested");
+ MOZ_ASSERT(!mChunkedBuffer,
+ "We should not point at a buffer when capture is requested");
+ } else {
+ if (mOptionalChunkedBufferStorage) {
+ MOZ_ASSERT(mChunkedBuffer == mOptionalChunkedBufferStorage.get(),
+ "Non-null mOptionalChunkedBufferStorage must be pointed-at "
+ "by mChunkedBuffer");
+ }
+ if (mChunkedBuffer) {
+ MOZ_ASSERT(!mChunkedBuffer->IsEmpty(),
+ "Non-null mChunkedBuffer must not be empty");
+ }
+ }
+#endif // DEBUG
+ }
+
+ StackCaptureOptions mCaptureOptions = StackCaptureOptions::NoStack;
+
+ // Optional storage for the backtrace, in case it was captured before the
+ // add-marker call.
+ UniquePtr<ProfileChunkedBuffer> mOptionalChunkedBufferStorage;
+
+ // If not null, this points to the backtrace. It may point to a backtrace
+ // temporarily stored on the stack, or to mOptionalChunkedBufferStorage.
+ ProfileChunkedBuffer* mChunkedBuffer = nullptr;
+};
+
+// This marker option captures a given inner window id.
+class MarkerInnerWindowId {
+ public:
+ // Default constructor, it leaves the id unspecified.
+ constexpr MarkerInnerWindowId() = default;
+
+ // Constructor with a specified inner window id.
+ constexpr explicit MarkerInnerWindowId(uint64_t i) : mInnerWindowId(i) {}
+
+ // Constructor with either specified inner window id or Nothing.
+ constexpr explicit MarkerInnerWindowId(const Maybe<uint64_t>& i)
+ : mInnerWindowId(i.valueOr(scNoId)) {}
+
+ // Explicit option with unspecified id.
+ constexpr static MarkerInnerWindowId NoId() { return MarkerInnerWindowId{}; }
+
+ [[nodiscard]] bool IsUnspecified() const { return mInnerWindowId == scNoId; }
+
+ [[nodiscard]] constexpr uint64_t Id() const { return mInnerWindowId; }
+
+ private:
+ static constexpr uint64_t scNoId = 0;
+ uint64_t mInnerWindowId = scNoId;
+};
+
+// This class combines each of the possible marker options above.
+class MarkerOptions {
+ public:
+ // Constructor from individual options (including none).
+ // Implicit to allow `{}` and one option type as-is.
+ // Options that are not provided here are defaulted. In particular, timing
+ // defaults to `MarkerTiming::InstantNow()` when the marker is recorded.
+ template <typename... Options>
+ MOZ_IMPLICIT MarkerOptions(Options&&... aOptions) {
+ (Set(std::forward<Options>(aOptions)), ...);
+ }
+
+ // Disallow copy.
+ MarkerOptions(const MarkerOptions&) = delete;
+ MarkerOptions& operator=(const MarkerOptions&) = delete;
+
+ // Allow move.
+ MarkerOptions(MarkerOptions&&) = default;
+ MarkerOptions& operator=(MarkerOptions&&) = default;
+
+ // The embedded `MarkerTiming` hasn't been specified yet.
+ [[nodiscard]] bool IsTimingUnspecified() const {
+ return mTiming.IsUnspecified();
+ }
+
+ // Each option may be added in a chain by e.g.:
+ // `options.Set(MarkerThreadId(123)).Set(MarkerTiming::IntervalEnd())`.
+ // When passed to an add-marker function, it must be an rvalue, either created
+ // on the spot, or `std::move`d from storage, e.g.:
+ // `PROFILER_MARKER_UNTYPED("...", std::move(options).Set(...))`;
+ //
+ // Options can be read by their name (without "Marker"), e.g.: `o.ThreadId()`.
+ // Add "Ref" for a non-const reference, e.g.: `o.ThreadIdRef() = ...;`
+#define FUNCTIONS_ON_MEMBER(NAME) \
+ MarkerOptions& Set(Marker##NAME&& a##NAME)& { \
+ m##NAME = std::move(a##NAME); \
+ return *this; \
+ } \
+ \
+ MarkerOptions&& Set(Marker##NAME&& a##NAME)&& { \
+ m##NAME = std::move(a##NAME); \
+ return std::move(*this); \
+ } \
+ \
+ const Marker##NAME& NAME() const { return m##NAME; } \
+ \
+ Marker##NAME& NAME##Ref() { return m##NAME; }
+
+ FUNCTIONS_ON_MEMBER(ThreadId);
+ FUNCTIONS_ON_MEMBER(Timing);
+ FUNCTIONS_ON_MEMBER(Stack);
+ FUNCTIONS_ON_MEMBER(InnerWindowId);
+#undef FUNCTIONS_ON_MEMBER
+
+ private:
+ friend ProfileBufferEntryReader::Deserializer<MarkerOptions>;
+
+ MarkerThreadId mThreadId;
+ MarkerTiming mTiming;
+ MarkerStack mStack;
+ MarkerInnerWindowId mInnerWindowId;
+};
+
+} // namespace mozilla
+
+namespace mozilla::baseprofiler::markers {
+
+// Default marker payload types, with no extra information, not even a marker
+// type and payload. This is intended for label-only markers.
+struct NoPayload final {};
+
+} // namespace mozilla::baseprofiler::markers
+
+namespace mozilla {
+
+class JSONWriter;
+
+// This class collects all the information necessary to stream the JSON schema
+// that informs the front-end how to display a type of markers.
+// It will be created and populated in `MarkerTypeDisplay()` functions in each
+// marker type definition, see Add/Set functions.
+class MarkerSchema {
+ public:
+ // This is used to describe a C++ type that is expected to be specified to
+ // the marker and used in PayloadField. This type is the expected input type
+ // to the marker data.
+ enum class InputType {
+ Uint64,
+ Uint32,
+ Boolean,
+ CString,
+ String,
+ TimeStamp,
+ TimeDuration
+ };
+
+ enum class Location : unsigned {
+ MarkerChart,
+ MarkerTable,
+ // This adds markers to the main marker timeline in the header.
+ TimelineOverview,
+ // In the timeline, this is a section that breaks out markers that are
+ // related to memory. When memory counters are enabled, this is its own
+ // track, otherwise it is displayed with the main thread.
+ TimelineMemory,
+ // This adds markers to the IPC timeline area in the header.
+ TimelineIPC,
+ // This adds markers to the FileIO timeline area in the header.
+ TimelineFileIO,
+ // TODO - This is not supported yet.
+ StackChart
+ };
+
+ // Used as constructor parameter, to explicitly specify that the location (and
+ // other display options) are handled as a special case in the front-end.
+ // In this case, *no* schema will be output for this type.
+ struct SpecialFrontendLocation {};
+
+ enum class Format {
+ // ----------------------------------------------------
+ // String types.
+
+ // Show the URL, and handle PII sanitization
+ Url,
+ // Show the file path, and handle PII sanitization.
+ FilePath,
+ // Important, do not put URL or file path information here, as it will not
+ // be sanitized. Please be careful with including other types of PII here as
+ // well.
+ // e.g. "Label: Some String"
+ String,
+
+ // Show a string from a UniqueStringArray given an index in the profile.
+ // e.g. 1, given string table ["hello", "world"] will show "world"
+ UniqueString,
+
+ // ----------------------------------------------------
+ // Numeric types
+
+ // For time data that represents a duration of time.
+ // e.g. "Label: 5s, 5ms, 5μs"
+ Duration,
+ // Data that happened at a specific time, relative to the start of the
+ // profile. e.g. "Label: 15.5s, 20.5ms, 30.5μs"
+ Time,
+ // The following are alternatives to display a time only in a specific unit
+ // of time.
+ Seconds, // "Label: 5s"
+ Milliseconds, // "Label: 5ms"
+ Microseconds, // "Label: 5μs"
+ Nanoseconds, // "Label: 5ns"
+ // e.g. "Label: 5.55mb, 5 bytes, 312.5kb"
+ Bytes,
+ // This should be a value between 0 and 1.
+ // "Label: 50%"
+ Percentage,
+ // The integer should be used for generic representations of numbers.
+ // Do not use it for time information.
+ // "Label: 52, 5,323, 1,234,567"
+ Integer,
+ // The decimal should be used for generic representations of numbers.
+ // Do not use it for time information.
+ // "Label: 52.23, 0.0054, 123,456.78"
+ Decimal
+ };
+
+ // This represents groups of markers which MarkerTypes can expose to indicate
+ // what group they belong to (multiple groups are allowed combined in bitwise
+ // or). This is currently only used for ETW filtering. In the long run this
+ // should be generalized to gecko markers.
+ enum class ETWMarkerGroup : uint64_t {
+ Generic = 1,
+ UserMarkers = 1 << 1,
+ Memory = 1 << 2,
+ Scheduling = 1 << 3
+ };
+
+ // Flags which describe additional information for a PayloadField.
+ enum class PayloadFlags : uint32_t { None = 0, Searchable = 1 };
+
+ // This is one field of payload to be used for additional marker data.
+ struct PayloadField {
+ // Key identifying the marker.
+ const char* Key;
+ // Input type, this represents the data type specified.
+ InputType InputTy;
+ // Label, additional description.
+ const char* Label = nullptr;
+ // Format as written to the JSON.
+ Format Fmt = Format::String;
+ // Optional PayloadFlags.
+ PayloadFlags Flags = PayloadFlags::None;
+ };
+
+ enum class Searchable { NotSearchable, Searchable };
+ enum class GraphType { Line, Bar, FilledLine };
+ enum class GraphColor {
+ Blue,
+ Green,
+ Grey,
+ Ink,
+ Magenta,
+ Orange,
+ Purple,
+ Red,
+ Teal,
+ Yellow
+ };
+
+ // Marker schema, with a non-empty list of locations where markers should be
+ // shown.
+ // Tech note: Even though `aLocations` are templated arguments, they are
+ // assigned to an `enum class` object, so they can only be of that enum type.
+ template <typename... Locations>
+ explicit MarkerSchema(Location aLocation, Locations... aLocations)
+ : mLocations{aLocation, aLocations...} {}
+
+ // Alternative constructor for MarkerSchema.
+ explicit MarkerSchema(const mozilla::MarkerSchema::Location* aLocations,
+ size_t aLength)
+ : mLocations(aLocations, aLocations + aLength) {}
+
+ // Marker schema for types that have special frontend handling.
+ // Nothing else should be set in this case.
+ // Implicit to allow quick return from MarkerTypeDisplay functions.
+ MOZ_IMPLICIT MarkerSchema(SpecialFrontendLocation) {}
+
+ // Caller must specify location(s) or SpecialFrontendLocation above.
+ MarkerSchema() = delete;
+
+ // Optional labels in the marker chart, the chart tooltip, and the marker
+ // table. If not provided, the marker "name" will be used. The given string
+ // can contain element keys in braces to include data elements streamed by
+ // `StreamJSONMarkerData()`. E.g.: "This is {text}"
+
+#define LABEL_SETTER(name) \
+ MarkerSchema& Set##name(std::string a##name) { \
+ m##name = std::move(a##name); \
+ return *this; \
+ }
+
+ LABEL_SETTER(ChartLabel)
+ LABEL_SETTER(TooltipLabel)
+ LABEL_SETTER(TableLabel)
+
+#undef LABEL_SETTER
+
+ MarkerSchema& SetAllLabels(std::string aText) {
+ // Here we set the same text in each label.
+ // TODO: Move to a single "label" field once the front-end allows it.
+ SetChartLabel(aText);
+ SetTooltipLabel(aText);
+ SetTableLabel(std::move(aText));
+ return *this;
+ }
+
+ // Each data element that is streamed by `StreamJSONMarkerData()` can be
+ // displayed as indicated by using one of the `Add...` function below.
+ // Each `Add...` will add a line in the full marker description. Parameters:
+ // - `aKey`: Element property name as streamed by `StreamJSONMarkerData()`.
+ // - `aLabel`: Optional prefix. Defaults to the key name.
+ // - `aFormat`: How to format the data element value, see `Format` above.
+ // - `aSearchable`: Optional, indicates if the value is used in searches,
+ // defaults to false.
+
+ MarkerSchema& AddKeyFormat(std::string aKey, Format aFormat) {
+ mData.emplace_back(mozilla::VariantType<DynamicData>{},
+ DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+ mozilla::Nothing{}});
+ return *this;
+ }
+
+ MarkerSchema& AddKeyLabelFormat(std::string aKey, std::string aLabel,
+ Format aFormat) {
+ mData.emplace_back(
+ mozilla::VariantType<DynamicData>{},
+ DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+ mozilla::Nothing{}});
+ return *this;
+ }
+
+ MarkerSchema& AddKeyFormatSearchable(std::string aKey, Format aFormat,
+ Searchable aSearchable) {
+ mData.emplace_back(mozilla::VariantType<DynamicData>{},
+ DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+ mozilla::Some(aSearchable)});
+ return *this;
+ }
+
+ MarkerSchema& AddKeyLabelFormatSearchable(std::string aKey,
+ std::string aLabel, Format aFormat,
+ Searchable aSearchable) {
+ mData.emplace_back(
+ mozilla::VariantType<DynamicData>{},
+ DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+ mozilla::Some(aSearchable)});
+ return *this;
+ }
+
+ // The display may also include static rows.
+
+ MarkerSchema& AddStaticLabelValue(std::string aLabel, std::string aValue) {
+ mData.emplace_back(mozilla::VariantType<StaticData>{},
+ StaticData{std::move(aLabel), std::move(aValue)});
+ return *this;
+ }
+
+ // Markers can be shown as timeline tracks.
+
+ MarkerSchema& AddChart(std::string aKey, GraphType aType) {
+ mGraphs.emplace_back(GraphData{std::move(aKey), aType, mozilla::Nothing{}});
+ return *this;
+ }
+
+ MarkerSchema& AddChartColor(std::string aKey, GraphType aType,
+ GraphColor aColor) {
+ mGraphs.emplace_back(
+ GraphData{std::move(aKey), aType, mozilla::Some(aColor)});
+ return *this;
+ }
+
+ // Internal streaming function.
+ MFBT_API void Stream(JSONWriter& aWriter, const Span<const char>& aName) &&;
+
+ private:
+ MFBT_API static Span<const char> LocationToStringSpan(Location aLocation);
+ MFBT_API static Span<const char> FormatToStringSpan(Format aFormat);
+ MFBT_API static Span<const char> GraphTypeToStringSpan(GraphType aType);
+ MFBT_API static Span<const char> GraphColorToStringSpan(GraphColor aColor);
+
+ // List of marker display locations. Empty for SpecialFrontendLocation.
+ std::vector<Location> mLocations;
+ // Labels for different places.
+ std::string mChartLabel;
+ std::string mTooltipLabel;
+ std::string mTableLabel;
+ // Main display, made of zero or more rows of key+label+format or label+value.
+ private:
+ struct DynamicData {
+ std::string mKey;
+ mozilla::Maybe<std::string> mLabel;
+ Format mFormat;
+ mozilla::Maybe<Searchable> mSearchable;
+ };
+ struct StaticData {
+ std::string mLabel;
+ std::string mValue;
+ };
+ using DataRow = mozilla::Variant<DynamicData, StaticData>;
+ using DataRowVector = std::vector<DataRow>;
+
+ DataRowVector mData;
+
+ struct GraphData {
+ std::string mKey;
+ GraphType mType;
+ mozilla::Maybe<GraphColor> mColor;
+ };
+ std::vector<GraphData> mGraphs;
+};
+
+} // namespace mozilla
+
+#endif // BaseProfilerMarkersPrerequisites_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerRAIIMacro.h b/mozglue/baseprofiler/public/BaseProfilerRAIIMacro.h
new file mode 100644
index 0000000000..b89f7e1f77
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerRAIIMacro.h
@@ -0,0 +1,15 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerRAIIMacro_h
+#define BaseProfilerRAIIMacro_h
+
+// Macros used by the AUTO_PROFILER_* macros to generate unique variable names.
+#define PROFILER_RAII_PASTE(id, line) id##line
+#define PROFILER_RAII_EXPAND(id, line) PROFILER_RAII_PASTE(id, line)
+#define PROFILER_RAII PROFILER_RAII_EXPAND(raiiObject, __LINE__)
+
+#endif // BaseProfilerRAIIMacro_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
new file mode 100644
index 0000000000..ba9eb67b62
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
@@ -0,0 +1,177 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASE_PROFILER_SHARED_LIBRARIES_H_
+#define BASE_PROFILER_SHARED_LIBRARIES_H_
+
+#include "BaseProfiler.h"
+
+#include <algorithm>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+class SharedLibrary {
+ public:
+ SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset,
+ const std::string& aBreakpadId, const std::string& aCodeId,
+ const std::string& aModuleName, const std::string& aModulePath,
+ const std::string& aDebugName, const std::string& aDebugPath,
+ const std::string& aVersion, const char* aArch)
+ : mStart(aStart),
+ mEnd(aEnd),
+ mOffset(aOffset),
+ mBreakpadId(aBreakpadId),
+ mCodeId(aCodeId),
+ mModuleName(aModuleName),
+ mModulePath(aModulePath),
+ mDebugName(aDebugName),
+ mDebugPath(aDebugPath),
+ mVersion(aVersion),
+ mArch(aArch) {}
+
+ SharedLibrary(const SharedLibrary& aEntry)
+ : mStart(aEntry.mStart),
+ mEnd(aEntry.mEnd),
+ mOffset(aEntry.mOffset),
+ mBreakpadId(aEntry.mBreakpadId),
+ mCodeId(aEntry.mCodeId),
+ mModuleName(aEntry.mModuleName),
+ mModulePath(aEntry.mModulePath),
+ mDebugName(aEntry.mDebugName),
+ mDebugPath(aEntry.mDebugPath),
+ mVersion(aEntry.mVersion),
+ mArch(aEntry.mArch) {}
+
+ SharedLibrary& operator=(const SharedLibrary& aEntry) {
+ // Gracefully handle self assignment
+ if (this == &aEntry) return *this;
+
+ mStart = aEntry.mStart;
+ mEnd = aEntry.mEnd;
+ mOffset = aEntry.mOffset;
+ mBreakpadId = aEntry.mBreakpadId;
+ mCodeId = aEntry.mCodeId;
+ mModuleName = aEntry.mModuleName;
+ mModulePath = aEntry.mModulePath;
+ mDebugName = aEntry.mDebugName;
+ mDebugPath = aEntry.mDebugPath;
+ mVersion = aEntry.mVersion;
+ mArch = aEntry.mArch;
+ return *this;
+ }
+
+ bool operator==(const SharedLibrary& other) const {
+ return (mStart == other.mStart) && (mEnd == other.mEnd) &&
+ (mOffset == other.mOffset) && (mModuleName == other.mModuleName) &&
+ (mModulePath == other.mModulePath) &&
+ (mDebugName == other.mDebugName) &&
+ (mDebugPath == other.mDebugPath) &&
+ (mBreakpadId == other.mBreakpadId) && (mCodeId == other.mCodeId) &&
+ (mVersion == other.mVersion) && (mArch == other.mArch);
+ }
+
+ uintptr_t GetStart() const { return mStart; }
+ uintptr_t GetEnd() const { return mEnd; }
+ uintptr_t GetOffset() const { return mOffset; }
+ const std::string& GetBreakpadId() const { return mBreakpadId; }
+ const std::string& GetCodeId() const { return mCodeId; }
+ const std::string& GetModuleName() const { return mModuleName; }
+ const std::string& GetModulePath() const { return mModulePath; }
+ const std::string& GetDebugName() const { return mDebugName; }
+ const std::string& GetDebugPath() const { return mDebugPath; }
+ const std::string& GetVersion() const { return mVersion; }
+ const std::string& GetArch() const { return mArch; }
+
+ private:
+ SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {}
+
+ uintptr_t mStart;
+ uintptr_t mEnd;
+ uintptr_t mOffset;
+ std::string mBreakpadId;
+ // A string carrying an identifier for a binary.
+ //
+ // All platforms have different formats:
+ // - Windows: The code ID for a Windows PE file.
+ // It's the PE timestamp and PE image size.
+ // - macOS: The code ID for a macOS / iOS binary (mach-O).
+ // It's the mach-O UUID without dashes and without the trailing 0 for the
+ // breakpad ID.
+ // - Linux/Android: The code ID for a Linux ELF file.
+ // It's the complete build ID, as hex string.
+ std::string mCodeId;
+ std::string mModuleName;
+ std::string mModulePath;
+ std::string mDebugName;
+ std::string mDebugPath;
+ std::string mVersion;
+ std::string mArch;
+};
+
+static bool CompareAddresses(const SharedLibrary& first,
+ const SharedLibrary& second) {
+ return first.GetStart() < second.GetStart();
+}
+
+class SharedLibraryInfo {
+ public:
+#ifdef MOZ_GECKO_PROFILER
+ static SharedLibraryInfo GetInfoForSelf();
+# ifdef XP_WIN
+ static SharedLibraryInfo GetInfoFromPath(const wchar_t* aPath);
+# endif
+
+ static void Initialize();
+#else
+ static SharedLibraryInfo GetInfoForSelf() { return SharedLibraryInfo(); }
+# ifdef XP_WIN
+ static SharedLibraryInfo GetInfoFromPath(const wchar_t* aPath) {
+ return SharedLibraryInfo();
+ }
+# endif
+
+ static void Initialize() {}
+#endif
+
+ SharedLibraryInfo() {}
+
+ void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); }
+
+ const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; }
+
+ SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; }
+
+ // Removes items in the range [first, last)
+ // i.e. element at the "last" index is not removed
+ void RemoveEntries(size_t first, size_t last) {
+ mEntries.erase(mEntries.begin() + first, mEntries.begin() + last);
+ }
+
+ bool Contains(const SharedLibrary& searchItem) const {
+ return (mEntries.end() !=
+ std::find(mEntries.begin(), mEntries.end(), searchItem));
+ }
+
+ size_t GetSize() const { return mEntries.size(); }
+
+ void SortByAddress() {
+ std::sort(mEntries.begin(), mEntries.end(), CompareAddresses);
+ }
+
+ void Clear() { mEntries.clear(); }
+
+ private:
+#ifdef XP_WIN
+ void AddSharedLibraryFromModuleInfo(const wchar_t* aModulePath,
+ mozilla::Maybe<HMODULE> aModule);
+#endif
+
+ std::vector<SharedLibrary> mEntries;
+};
+
+#endif // BASE_PROFILER_SHARED_LIBRARIES_H_
diff --git a/mozglue/baseprofiler/public/BaseProfilerState.h b/mozglue/baseprofiler/public/BaseProfilerState.h
new file mode 100644
index 0000000000..f9df8d2975
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerState.h
@@ -0,0 +1,412 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef BaseProfilerState_h
+#define BaseProfilerState_h
+
+// This header contains most functions that give information about the Base
+// Profiler: Whether it is active or not, paused, the selected features, and
+// some generic process and thread information.
+// It is safe to include unconditionally, but uses of structs and functions must
+// be guarded by `#ifdef MOZ_GECKO_PROFILER`.
+
+#include "mozilla/BaseProfilerUtils.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+# define AUTO_PROFILER_STATS(name)
+
+namespace mozilla::baseprofiler {
+
+[[nodiscard]] inline bool profiler_is_active() { return false; }
+[[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; }
+
+} // namespace mozilla::baseprofiler
+
+#else // !MOZ_GECKO_PROFILER
+
+# include "mozilla/Atomics.h"
+# include "mozilla/Maybe.h"
+
+# include <stdint.h>
+# include <string>
+
+// Uncomment the following line to display profiler runtime statistics at
+// shutdown.
+// # define PROFILER_RUNTIME_STATS
+
+# ifdef PROFILER_RUNTIME_STATS
+# include "mozilla/TimeStamp.h"
+# endif
+
+namespace mozilla::baseprofiler {
+
+# ifdef PROFILER_RUNTIME_STATS
+// This class gathers durations and displays some basic stats when destroyed.
+// It is intended to be used as a static variable (see `AUTO_PROFILER_STATS`
+// below), to display stats at the end of the program.
+class StaticBaseProfilerStats {
+ public:
+ explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {}
+
+ ~StaticBaseProfilerStats() {
+ // Using unsigned long long for computations and printfs.
+ using ULL = unsigned long long;
+ ULL n = static_cast<ULL>(mNumberDurations);
+ if (n != 0) {
+ ULL sumNs = static_cast<ULL>(mSumDurationsNs);
+ printf(
+ "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n",
+ int(profiler_current_process_id().ToNumber()), mName, sumNs, n,
+ sumNs / n, static_cast<ULL>(mLongestDurationNs));
+ } else {
+ printf("[%d] Profiler stats `%s`: (nothing)\n",
+ int(profiler_current_process_id().ToNumber()), mName);
+ }
+ }
+
+ void AddDurationFrom(TimeStamp aStart) {
+ DurationNs duration = static_cast<DurationNs>(
+ (TimeStamp::Now() - aStart).ToMicroseconds() * 1000 + 0.5);
+ mSumDurationsNs += duration;
+ ++mNumberDurations;
+ // Update mLongestDurationNs if this one is longer.
+ for (;;) {
+ DurationNs longest = mLongestDurationNs;
+ if (MOZ_LIKELY(longest >= duration)) {
+ // This duration is not the longest, nothing to do.
+ break;
+ }
+ if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) {
+ // Successfully updated `mLongestDurationNs` with the new value.
+ break;
+ }
+ // Otherwise someone else just updated `mLongestDurationNs`, we need to
+ // try again by looping.
+ }
+ }
+
+ private:
+ using DurationNs = uint64_t;
+ using Count = uint32_t;
+
+ Atomic<DurationNs> mSumDurationsNs{0};
+ Atomic<DurationNs> mLongestDurationNs{0};
+ Atomic<Count> mNumberDurations{0};
+ const char* mName;
+};
+
+// RAII object that measure its scoped lifetime duration and reports it to a
+// `StaticBaseProfilerStats`.
+class MOZ_RAII AutoProfilerStats {
+ public:
+ explicit AutoProfilerStats(StaticBaseProfilerStats& aStats)
+ : mStats(aStats), mStart(TimeStamp::Now()) {}
+
+ ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); }
+
+ private:
+ StaticBaseProfilerStats& mStats;
+ TimeStamp mStart;
+};
+
+// Macro that should be used to collect basic statistics from measurements of
+// block durations, from where this macro is, until the end of its enclosing
+// scope. The name is used in the static variable name and when displaying stats
+// at the end of the program; Another location could use the same name but their
+// stats will not be combined, so use different name if these locations should
+// be distinguished.
+# define AUTO_PROFILER_STATS(name) \
+ static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \
+ #name); \
+ ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name);
+
+# else // PROFILER_RUNTIME_STATS
+
+# define AUTO_PROFILER_STATS(name)
+
+# endif // PROFILER_RUNTIME_STATS else
+
+//---------------------------------------------------------------------------
+// Profiler features
+//---------------------------------------------------------------------------
+
+# if defined(__APPLE__) && defined(__aarch64__)
+# define POWER_HELP "Sample per process power use"
+# elif defined(__APPLE__) && defined(__x86_64__)
+# define POWER_HELP \
+ "Record the power used by the entire system with each sample."
+# elif defined(__linux__) && defined(__x86_64__)
+# define POWER_HELP \
+ "Record the power used by the entire system with each sample. " \
+ "Only available with Intel CPUs and requires setting " \
+ "the sysctl kernel.perf_event_paranoid to 0."
+# elif defined(_MSC_VER)
+# define POWER_HELP \
+ "Record the value of every energy meter available on the system with " \
+ "each sample. Only available on Windows 11 with Intel CPUs."
+# else
+# define POWER_HELP "Not supported on this platform."
+# endif
+
+// Higher-order macro containing all the feature info in one place. Define
+// |MACRO| appropriately to extract the relevant parts. Note that the number
+// values are used internally only and so can be changed without consequence.
+// Any changes to this list should also be applied to the feature list in
+// toolkit/components/extensions/schemas/geckoProfiler.json.
+// *** Synchronize with lists in ProfilerState.h and geckoProfiler.json ***
+# define BASE_PROFILER_FOR_EACH_FEATURE(MACRO) \
+ MACRO(0, "java", Java, "Profile Java code, Android only") \
+ \
+ MACRO(1, "js", JS, \
+ "Get the JS engine to expose the JS stack to the profiler") \
+ \
+ MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O") \
+ \
+ MACRO(3, "fileio", FileIO, \
+ "Add file I/O from all profiled threads, implies mainthreadio") \
+ \
+ MACRO(4, "fileioall", FileIOAll, \
+ "Add file I/O from all threads, implies fileio") \
+ \
+ MACRO(5, "nomarkerstacks", NoMarkerStacks, \
+ "Markers do not capture stacks, to reduce overhead") \
+ \
+ MACRO(6, "screenshots", Screenshots, \
+ "Take a snapshot of the window on every composition") \
+ \
+ MACRO(7, "seqstyle", SequentialStyle, \
+ "Disable parallel traversal in styling") \
+ \
+ MACRO(8, "stackwalk", StackWalk, \
+ "Walk the C++ stack, not available on all platforms") \
+ \
+ MACRO(9, "jsallocations", JSAllocations, \
+ "Have the JavaScript engine track allocations") \
+ \
+ MACRO(10, "nostacksampling", NoStackSampling, \
+ "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and " \
+ "labels") \
+ \
+ MACRO(11, "nativeallocations", NativeAllocations, \
+ "Collect the stacks from a smaller subset of all native " \
+ "allocations, biasing towards collecting larger allocations") \
+ \
+ MACRO(12, "ipcmessages", IPCMessages, \
+ "Have the IPC layer track cross-process messages") \
+ \
+ MACRO(13, "audiocallbacktracing", AudioCallbackTracing, \
+ "Audio callback tracing") \
+ \
+ MACRO(14, "cpu", CPUUtilization, "CPU utilization") \
+ \
+ MACRO(15, "notimerresolutionchange", NoTimerResolutionChange, \
+ "Do not adjust the timer resolution for fast sampling, so that " \
+ "other Firefox timers do not get affected") \
+ \
+ MACRO(16, "cpuallthreads", CPUAllThreads, \
+ "Sample the CPU utilization of all registered threads") \
+ \
+ MACRO(17, "samplingallthreads", SamplingAllThreads, \
+ "Sample the stacks of all registered threads") \
+ \
+ MACRO(18, "markersallthreads", MarkersAllThreads, \
+ "Record markers from all registered threads") \
+ \
+ MACRO(19, "unregisteredthreads", UnregisteredThreads, \
+ "Discover and profile unregistered threads -- beware: expensive!") \
+ \
+ MACRO(20, "processcpu", ProcessCPU, \
+ "Sample the CPU utilization of each process") \
+ \
+ MACRO(21, "power", Power, POWER_HELP) \
+ \
+ MACRO(22, "cpufreq", CPUFrequency, \
+ "Record the clock frequency of " \
+ "every CPU core for every profiler sample.") \
+ \
+ MACRO(23, "bandwidth", Bandwidth, \
+ "Record the network bandwidth used for every profiler sample.")
+// *** Synchronize with lists in ProfilerState.h and geckoProfiler.json ***
+
+struct ProfilerFeature {
+# define DECLARE(n_, str_, Name_, desc_) \
+ static constexpr uint32_t Name_ = (1u << n_); \
+ [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \
+ return aFeatures & Name_; \
+ } \
+ static constexpr void Set##Name_(uint32_t& aFeatures) { \
+ aFeatures |= Name_; \
+ } \
+ static constexpr void Clear##Name_(uint32_t& aFeatures) { \
+ aFeatures &= ~Name_; \
+ }
+
+ // Define a bitfield constant, a getter, and two setters for each feature.
+ BASE_PROFILER_FOR_EACH_FEATURE(DECLARE)
+
+# undef DECLARE
+};
+
+namespace detail {
+
+// RacyFeatures is only defined in this header file so that its methods can
+// be inlined into profiler_is_active(). Please do not use anything from the
+// detail namespace outside the profiler.
+
+// Within the profiler's code, the preferred way to check profiler activeness
+// and features is via ActivePS(). However, that requires locking gPSMutex.
+// There are some hot operations where absolute precision isn't required, so we
+// duplicate the activeness/feature state in a lock-free manner in this class.
+class RacyFeatures {
+ public:
+ MFBT_API static void SetActive(uint32_t aFeatures);
+
+ MFBT_API static void SetInactive();
+
+ MFBT_API static void SetPaused();
+
+ MFBT_API static void SetUnpaused();
+
+ MFBT_API static void SetSamplingPaused();
+
+ MFBT_API static void SetSamplingUnpaused();
+
+ [[nodiscard]] MFBT_API static mozilla::Maybe<uint32_t> FeaturesIfActive() {
+ if (uint32_t af = sActiveAndFeatures; af & Active) {
+ // Active, remove the Active&Paused bits to get all features.
+ return Some(af & ~(Active | Paused | SamplingPaused));
+ }
+ return Nothing();
+ }
+
+ [[nodiscard]] MFBT_API static bool IsActive();
+
+ [[nodiscard]] MFBT_API static bool IsActiveWithFeature(uint32_t aFeature);
+
+ [[nodiscard]] MFBT_API static bool IsActiveWithoutFeature(uint32_t aFeature);
+
+ // True if profiler is active, and not fully paused.
+ // Note that periodic sampling *could* be paused!
+ [[nodiscard]] MFBT_API static bool IsActiveAndUnpaused();
+
+ // True if profiler is active, and sampling is not paused (though generic
+ // `SetPaused()` or specific `SetSamplingPaused()`).
+ [[nodiscard]] MFBT_API static bool IsActiveAndSamplingUnpaused();
+
+ private:
+ static constexpr uint32_t Active = 1u << 31;
+ static constexpr uint32_t Paused = 1u << 30;
+ static constexpr uint32_t SamplingPaused = 1u << 29;
+
+// Ensure Active/Paused don't overlap with any of the feature bits.
+# define NO_OVERLAP(n_, str_, Name_, desc_) \
+ static_assert(ProfilerFeature::Name_ != SamplingPaused, \
+ "bad feature value");
+
+ BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
+
+# undef NO_OVERLAP
+
+ // We combine the active bit with the feature bits so they can be read or
+ // written in a single atomic operation.
+ // TODO: Could this be MFBT_DATA for better inlining optimization?
+ static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures;
+};
+
+MFBT_API bool IsThreadBeingProfiled();
+
+} // namespace detail
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Is the profiler active? Note: the return value of this function can become
+// immediately out-of-date. E.g. the profile might be active but then
+// profiler_stop() is called immediately afterward. One common and reasonable
+// pattern of usage is the following:
+//
+// if (profiler_is_active()) {
+// ExpensiveData expensiveData = CreateExpensiveData();
+// PROFILER_OPERATION(expensiveData);
+// }
+//
+// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this
+// case the profiler_is_active() check is just an optimization -- it prevents
+// us calling CreateExpensiveData() unnecessarily in most cases, but the
+// expensive data will end up being created but not used if another thread
+// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION
+// calls.
+[[nodiscard]] inline bool profiler_is_active() {
+ return baseprofiler::detail::RacyFeatures::IsActive();
+}
+
+// Same as profiler_is_active(), but also checks if the profiler is not paused.
+[[nodiscard]] inline bool profiler_is_active_and_unpaused() {
+ return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused();
+}
+
+// Is the profiler active and unpaused, and is the current thread being
+// profiled? (Same caveats and recommented usage as profiler_is_active().)
+[[nodiscard]] inline bool profiler_thread_is_being_profiled() {
+ return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused() &&
+ baseprofiler::detail::IsThreadBeingProfiled();
+}
+
+// Is the profiler active and paused? Returns false if the profiler is inactive.
+[[nodiscard]] MFBT_API bool profiler_is_paused();
+
+// Is the profiler active and sampling is paused? Returns false if the profiler
+// is inactive.
+[[nodiscard]] MFBT_API bool profiler_is_sampling_paused();
+
+// Is the current thread sleeping?
+[[nodiscard]] MFBT_API bool profiler_thread_is_sleeping();
+
+// Get all the features supported by the profiler that are accepted by
+// profiler_start(). The result is the same whether the profiler is active or
+// not.
+[[nodiscard]] MFBT_API uint32_t profiler_get_available_features();
+
+// Returns the full feature set if the profiler is active.
+// Note: the return value can become immediately out-of-date, much like the
+// return value of profiler_is_active().
+[[nodiscard]] inline mozilla::Maybe<uint32_t> profiler_features_if_active() {
+ return baseprofiler::detail::RacyFeatures::FeaturesIfActive();
+}
+
+// Check if a profiler feature (specified via the ProfilerFeature type) is
+// active. Returns false if the profiler is inactive. Note: the return value
+// can become immediately out-of-date, much like the return value of
+// profiler_is_active().
+[[nodiscard]] MFBT_API bool profiler_feature_active(uint32_t aFeature);
+
+// Check if the profiler is active without a feature (specified via the
+// ProfilerFeature type). Note: the return value can become immediately
+// out-of-date, much like the return value of profiler_is_active().
+[[nodiscard]] MFBT_API bool profiler_active_without_feature(uint32_t aFeature);
+
+// Returns true if any of the profiler mutexes are currently locked *on the
+// current thread*. This may be used by re-entrant code that may call profiler
+// functions while the same of a different profiler mutex is locked, which could
+// deadlock.
+[[nodiscard]] bool profiler_is_locked_on_current_thread();
+
+} // namespace mozilla::baseprofiler
+
+#endif // !MOZ_GECKO_PROFILER
+
+#endif // BaseProfilerState_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerUtils.h b/mozglue/baseprofiler/public/BaseProfilerUtils.h
new file mode 100644
index 0000000000..ab02e03b95
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerUtils.h
@@ -0,0 +1,227 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerUtils_h
+#define BaseProfilerUtils_h
+
+// This header contains most process- and thread-related functions.
+// It is safe to include unconditionally.
+
+// --------------------------------------------- WASI process & thread ids
+#if defined(__wasi__)
+
+namespace mozilla::baseprofiler::detail {
+using ProcessIdType = unsigned;
+using ThreadIdType = unsigned;
+} // namespace mozilla::baseprofiler::detail
+
+// --------------------------------------------- Windows process & thread ids
+#elif defined(XP_WIN)
+
+namespace mozilla::baseprofiler::detail {
+using ProcessIdType = int;
+using ThreadIdType = unsigned long;
+} // namespace mozilla::baseprofiler::detail
+
+// --------------------------------------------- Non-Windows process id
+#else
+// All non-Windows platforms are assumed to be POSIX, which has getpid().
+
+# include <unistd.h>
+namespace mozilla::baseprofiler::detail {
+using ProcessIdType = decltype(getpid());
+} // namespace mozilla::baseprofiler::detail
+
+// --------------------------------------------- Non-Windows thread id
+// ------------------------------------------------------- macOS
+# if defined(XP_MACOSX)
+
+namespace mozilla::baseprofiler::detail {
+using ThreadIdType = uint64_t;
+} // namespace mozilla::baseprofiler::detail
+
+// ------------------------------------------------------- Android
+// Test Android before Linux, because Linux includes Android.
+# elif defined(__ANDROID__) || defined(ANDROID)
+
+# include <sys/types.h>
+namespace mozilla::baseprofiler::detail {
+using ThreadIdType = decltype(gettid());
+} // namespace mozilla::baseprofiler::detail
+
+// ------------------------------------------------------- Linux
+# elif defined(XP_LINUX)
+
+namespace mozilla::baseprofiler::detail {
+using ThreadIdType = long;
+} // namespace mozilla::baseprofiler::detail
+
+// ------------------------------------------------------- FreeBSD
+# elif defined(XP_FREEBSD)
+
+namespace mozilla::baseprofiler::detail {
+using ThreadIdType = long;
+} // namespace mozilla::baseprofiler::detail
+
+// ------------------------------------------------------- Others
+# else
+
+# include <thread>
+
+namespace mozilla::baseprofiler::detail {
+using ThreadIdType = std::thread::id;
+} // namespace mozilla::baseprofiler::detail
+
+# endif
+#endif // End of non-XP_WIN.
+
+#include <stdint.h>
+#include <string.h>
+#include <type_traits>
+
+namespace mozilla::baseprofiler {
+
+// Trivially-copyable class containing a process id. It may be left unspecified.
+class BaseProfilerProcessId {
+ public:
+ using NativeType = detail::ProcessIdType;
+
+ using NumberType =
+ std::conditional_t<(sizeof(NativeType) <= 4), uint32_t, uint64_t>;
+ static_assert(sizeof(NativeType) <= sizeof(NumberType));
+
+ // Unspecified process id.
+ constexpr BaseProfilerProcessId() = default;
+
+ [[nodiscard]] constexpr bool IsSpecified() const {
+ return mProcessId != scUnspecified;
+ }
+
+ // Construct from a native type.
+ [[nodiscard]] static BaseProfilerProcessId FromNativeId(
+ const NativeType& aNativeProcessId) {
+ BaseProfilerProcessId id;
+ // Convert trivially-copyable native id by copying its bits.
+ static_assert(std::is_trivially_copyable_v<NativeType>);
+ memcpy(&id.mProcessId, &aNativeProcessId, sizeof(NativeType));
+ return id;
+ }
+
+ // Get the process id as a number, which may be unspecified.
+ // This should only be used for serialization or logging.
+ [[nodiscard]] constexpr NumberType ToNumber() const { return mProcessId; }
+
+ // BaseProfilerProcessId from given number (which may be unspecified).
+ constexpr static BaseProfilerProcessId FromNumber(
+ const NumberType& aProcessId) {
+ BaseProfilerProcessId id;
+ id.mProcessId = aProcessId;
+ return id;
+ }
+
+ [[nodiscard]] constexpr bool operator==(
+ const BaseProfilerProcessId& aOther) const {
+ return mProcessId == aOther.mProcessId;
+ }
+ [[nodiscard]] constexpr bool operator!=(
+ const BaseProfilerProcessId& aOther) const {
+ return mProcessId != aOther.mProcessId;
+ }
+
+ private:
+ static constexpr NumberType scUnspecified = 0;
+ NumberType mProcessId = scUnspecified;
+};
+
+// Check traits. These should satisfy usage in std::atomic.
+static_assert(std::is_trivially_copyable_v<BaseProfilerProcessId>);
+static_assert(std::is_copy_constructible_v<BaseProfilerProcessId>);
+static_assert(std::is_move_constructible_v<BaseProfilerProcessId>);
+static_assert(std::is_copy_assignable_v<BaseProfilerProcessId>);
+static_assert(std::is_move_assignable_v<BaseProfilerProcessId>);
+
+// Trivially-copyable class containing a thread id. It may be left unspecified.
+class BaseProfilerThreadId {
+ public:
+ using NativeType = detail::ThreadIdType;
+
+ using NumberType =
+ std::conditional_t<(sizeof(NativeType) <= 4), uint32_t, uint64_t>;
+ static_assert(sizeof(NativeType) <= sizeof(NumberType));
+
+ // Unspecified thread id.
+ constexpr BaseProfilerThreadId() = default;
+
+ [[nodiscard]] constexpr bool IsSpecified() const {
+ return mThreadId != scUnspecified;
+ }
+
+ // Construct from a native type.
+ [[nodiscard]] static BaseProfilerThreadId FromNativeId(
+ const NativeType& aNativeThreadId) {
+ BaseProfilerThreadId id;
+ // Convert trivially-copyable native id by copying its bits.
+ static_assert(std::is_trivially_copyable_v<NativeType>);
+ memcpy(&id.mThreadId, &aNativeThreadId, sizeof(NativeType));
+ return id;
+ }
+
+ // Get the thread id as a number, which may be unspecified.
+ // This should only be used for serialization or logging.
+ [[nodiscard]] constexpr NumberType ToNumber() const { return mThreadId; }
+
+ // BaseProfilerThreadId from given number (which may be unspecified).
+ constexpr static BaseProfilerThreadId FromNumber(
+ const NumberType& aThreadId) {
+ BaseProfilerThreadId id;
+ id.mThreadId = aThreadId;
+ return id;
+ }
+
+ [[nodiscard]] constexpr bool operator==(
+ const BaseProfilerThreadId& aOther) const {
+ return mThreadId == aOther.mThreadId;
+ }
+ [[nodiscard]] constexpr bool operator!=(
+ const BaseProfilerThreadId& aOther) const {
+ return mThreadId != aOther.mThreadId;
+ }
+
+ private:
+ static constexpr NumberType scUnspecified = 0;
+ NumberType mThreadId = scUnspecified;
+};
+
+// Check traits. These should satisfy usage in std::atomic.
+static_assert(std::is_trivially_copyable_v<BaseProfilerThreadId>);
+static_assert(std::is_copy_constructible_v<BaseProfilerThreadId>);
+static_assert(std::is_move_constructible_v<BaseProfilerThreadId>);
+static_assert(std::is_copy_assignable_v<BaseProfilerThreadId>);
+static_assert(std::is_move_assignable_v<BaseProfilerThreadId>);
+
+} // namespace mozilla::baseprofiler
+
+#include "mozilla/Types.h"
+
+namespace mozilla::baseprofiler {
+
+// Get the current process's ID.
+[[nodiscard]] MFBT_API BaseProfilerProcessId profiler_current_process_id();
+
+// Get the current thread's ID.
+[[nodiscard]] MFBT_API BaseProfilerThreadId profiler_current_thread_id();
+
+// Must be called at least once from the main thread, before any other main-
+// thread id function.
+MFBT_API void profiler_init_main_thread_id();
+
+[[nodiscard]] MFBT_API BaseProfilerThreadId profiler_main_thread_id();
+
+[[nodiscard]] MFBT_API bool profiler_is_main_thread();
+
+} // namespace mozilla::baseprofiler
+
+#endif // BaseProfilerUtils_h
diff --git a/mozglue/baseprofiler/public/BaseProfilingCategory.h b/mozglue/baseprofiler/public/BaseProfilingCategory.h
new file mode 100644
index 0000000000..2e80950966
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingCategory.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingCategory_h
+#define BaseProfilingCategory_h
+
+#include "mozilla/Types.h"
+
+#include <cstdint>
+
+#include "ProfilingCategoryList.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// An enum that lists all possible category pairs in one list.
+// This is the enum that is used in profiler stack labels. Having one list that
+// includes subcategories from all categories in one list allows assigning the
+// category pair to a stack label with just one number.
+#define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) name,
+#define CATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategoryPair : uint32_t {
+ MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+ CATEGORY_ENUM_SUBCATEGORY,
+ CATEGORY_ENUM_END_CATEGORY)
+ COUNT,
+ LAST = COUNT - 1,
+};
+#undef CATEGORY_ENUM_BEGIN_CATEGORY
+#undef CATEGORY_ENUM_SUBCATEGORY
+#undef CATEGORY_ENUM_END_CATEGORY
+
+// An enum that lists just the categories without their subcategories.
+#define SUPERCATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) name,
+#define SUPERCATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString)
+#define SUPERCATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategory : uint32_t {
+ MOZ_PROFILING_CATEGORY_LIST(SUPERCATEGORY_ENUM_BEGIN_CATEGORY,
+ SUPERCATEGORY_ENUM_SUBCATEGORY,
+ SUPERCATEGORY_ENUM_END_CATEGORY)
+ COUNT,
+ LAST = COUNT - 1,
+};
+#undef SUPERCATEGORY_ENUM_BEGIN_CATEGORY
+#undef SUPERCATEGORY_ENUM_SUBCATEGORY
+#undef SUPERCATEGORY_ENUM_END_CATEGORY
+
+// clang-format on
+
+struct ProfilingCategoryPairInfo {
+ ProfilingCategory mCategory;
+ uint32_t mSubcategoryIndex;
+ const char* mLabel;
+};
+
+MFBT_API const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+ ProfilingCategoryPair aCategoryPair);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* BaseProfilingCategory_h */
diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h
new file mode 100644
index 0000000000..76f8d6c801
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingStack.h
@@ -0,0 +1,518 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingStack_h
+#define BaseProfilingStack_h
+
+#ifndef MOZ_GECKO_PROFILER
+# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Atomics.h"
+
+#include <stdint.h>
+
+// This file defines the classes ProfilingStack and ProfilingStackFrame.
+// The ProfilingStack manages an array of ProfilingStackFrames.
+// It keeps track of the "label stack" and the JS interpreter stack.
+// The two stack types are interleaved.
+//
+// Usage:
+//
+// ProfilingStack* profilingStack = ...;
+//
+// // For label frames:
+// profilingStack->pushLabelFrame(...);
+// // Execute some code. When finished, pop the frame:
+// profilingStack->pop();
+//
+// // For JS stack frames:
+// profilingStack->pushJSFrame(...);
+// // Execute some code. When finished, pop the frame:
+// profilingStack->pop();
+//
+//
+// Concurrency considerations
+//
+// A thread's profiling stack (and the frames inside it) is only modified by
+// that thread. However, the profiling stack can be *read* by a different
+// thread, the sampler thread: Whenever the profiler wants to sample a given
+// thread A, the following happens:
+// (1) Thread A is suspended.
+// (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
+// including all ProfilingStackFrames that are currently in that stack
+// (profilingStack->frames[0..profilingStack->stackSize()]).
+// (3) Thread A is resumed.
+//
+// Thread suspension is achieved using platform-specific APIs; refer to each
+// platform's Sampler::SuspendAndSampleAndResumeThread implementation in
+// platform-*.cpp for details.
+//
+// When the thread is suspended, the values in profilingStack->stackPointer and
+// in the stack frame range
+// profilingStack->frames[0..profilingStack->stackPointer] need to be in a
+// consistent state, so that thread S does not read partially- constructed stack
+// frames. More specifically, we have two requirements:
+// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
+// data needs to be put in place *before* the stackPointer is incremented,
+// and the compiler + CPU need to know that this order matters.
+// (2) When popping an frame from the stack and then preparing the
+// ProfilingStackFrame data for the next frame that is about to be pushed,
+// the decrement of the stackPointer in pop() needs to happen *before* the
+// ProfilingStackFrame for the new frame is being popuplated, and the
+// compiler + CPU need to know that this order matters.
+//
+// We can express the relevance of these orderings in multiple ways.
+// Option A is to make stackPointer an atomic with SequentiallyConsistent
+// memory ordering. This would ensure that no writes in thread A would be
+// reordered across any writes to stackPointer, which satisfies requirements
+// (1) and (2) at the same time. Option A is the simplest.
+// Option B is to use ReleaseAcquire memory ordering both for writes to
+// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
+// ensure that all writes that happened *before this write in program order* are
+// not reordered to happen after this write. ReleaseAcquire ordering places no
+// requirements on the ordering of writes that happen *after* this write in
+// program order.
+// Using release-stores for writes to stackPointer expresses requirement (1),
+// and using release-stores for writes to the ProfilingStackFrame fields
+// expresses requirement (2).
+//
+// Option B is more complicated than option A, but has much better performance
+// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
+// from option A to option B reduced the overhead of pushing+popping a
+// ProfilingStackFrame by 10 nanoseconds.
+// On x86/64, release-stores require no explicit hardware barriers or lock
+// instructions.
+// On ARM/64, option B may be slower than option A, because the compiler will
+// generate hardware barriers for every single release-store instead of just
+// for the writes to stackPointer. However, the actual performance impact of
+// this has not yet been measured on ARM, so we're currently using option B
+// everywhere. This is something that we may want to change in the future once
+// we've done measurements.
+
+namespace mozilla {
+namespace baseprofiler {
+
+// A call stack can be specified to the JS engine such that all JS entry/exits
+// to functions push/pop a stack frame to/from the specified stack.
+//
+// For more detailed information, see vm/GeckoProfiler.h.
+//
+class ProfilingStackFrame {
+ // A ProfilingStackFrame represents either a label frame or a JS frame.
+
+ // WARNING WARNING WARNING
+ //
+ // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
+ // that writes to these fields are release-writes, which ensures that
+ // earlier writes in this thread don't get reordered after the writes to
+ // these fields. In particular, the decrement of the stack pointer in
+ // ProfilingStack::pop() is a write that *must* happen before the values in
+ // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
+ // see an inconsistent state where the stack pointer still points to a
+ // ProfilingStackFrame which has already been popped off the stack and whose
+ // fields have now been partially repopulated with new values.
+ // See the "Concurrency considerations" paragraph at the top of this file
+ // for more details.
+
+ // Descriptive label for this stack frame. Must be a static string! Can be
+ // an empty string, but not a null pointer.
+ Atomic<const char*, ReleaseAcquire> label_;
+
+ // An additional descriptive string of this frame which is combined with
+ // |label_| in profiler output. Need not be (and usually isn't) static. Can
+ // be null.
+ Atomic<const char*, ReleaseAcquire> dynamicString_;
+
+ // Stack pointer for non-JS stack frames, the script pointer otherwise.
+ Atomic<void*, ReleaseAcquire> spOrScript;
+
+ // ID of the JS Realm for JS stack frames.
+ // Must not be used on non-JS frames; it'll contain either the default 0,
+ // or a leftover value from a previous JS stack frame that was using this
+ // ProfilingStackFrame object.
+ mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
+
+ // The bytecode offset for JS stack frames.
+ // Must not be used on non-JS frames; it'll contain either the default 0,
+ // or a leftover value from a previous JS stack frame that was using this
+ // ProfilingStackFrame object.
+ Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_;
+
+ // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
+ Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_;
+
+ public:
+ ProfilingStackFrame() = default;
+ ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
+ label_ = other.label();
+ dynamicString_ = other.dynamicString();
+ void* spScript = other.spOrScript;
+ spOrScript = spScript;
+ int32_t offsetIfJS = other.pcOffsetIfJS_;
+ pcOffsetIfJS_ = offsetIfJS;
+ int64_t realmID = other.realmID_;
+ realmID_ = realmID;
+ uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
+ flagsAndCategoryPair_ = flagsAndCategory;
+ return *this;
+ }
+
+ // Reserve up to 16 bits for flags, and 16 for category pair.
+ enum class Flags : uint32_t {
+ // The first three flags describe the kind of the frame and are
+ // mutually exclusive. (We still give them individual bits for
+ // simplicity.)
+
+ // A regular label frame. These usually come from AutoProfilerLabel.
+ IS_LABEL_FRAME = 1 << 0,
+
+ // A special frame indicating the start of a run of JS profiling stack
+ // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
+ // field. These frames are needed to get correct ordering between JS
+ // and LABEL frames because JS frames don't carry sp information.
+ // SP is short for "stack pointer".
+ IS_SP_MARKER_FRAME = 1 << 1,
+
+ // A JS frame.
+ IS_JS_FRAME = 1 << 2,
+
+ // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
+ // frames can have this flag set and unset during their lifetime.
+ // JS_OSR frames are ignored.
+ JS_OSR = 1 << 3,
+
+ // The next three are mutually exclusive.
+ // By default, for profiling stack frames that have both a label and a
+ // dynamic string, the two strings are combined into one string of the
+ // form "<label> <dynamicString>" during JSON serialization. The
+ // following flags can be used to change this preset.
+ STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>"
+ STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>"
+ STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>"
+
+ // If set, causes this stack frame to be marked as "relevantForJS" in
+ // the profile JSON, which will make it show up in the "JS only" call
+ // tree view.
+ RELEVANT_FOR_JS = 1 << 7,
+
+ // If set, causes the label on this ProfilingStackFrame to be ignored
+ // and to be replaced by the subcategory's label.
+ LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
+
+ // Frame dynamic string does not contain user data.
+ NONSENSITIVE = 1 << 9,
+
+ // A JS Baseline Interpreter frame.
+ IS_BLINTERP_FRAME = 1 << 10,
+
+ FLAGS_BITCOUNT = 16,
+ FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
+ };
+
+ static_assert(
+ uint32_t(ProfilingCategoryPair::LAST) <=
+ (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
+ "Too many category pairs to fit into u32 with together with the "
+ "reserved bits for the flags");
+
+ bool isLabelFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
+ }
+
+ bool isSpMarkerFrame() const {
+ return uint32_t(flagsAndCategoryPair_) &
+ uint32_t(Flags::IS_SP_MARKER_FRAME);
+ }
+
+ bool isJsFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
+ }
+
+ bool isOSRFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
+ }
+
+ void setIsOSRFrame(bool isOSR) {
+ if (isOSR) {
+ flagsAndCategoryPair_ =
+ uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
+ } else {
+ flagsAndCategoryPair_ =
+ uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
+ }
+ }
+
+ const char* label() const {
+ uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
+ if (flagsAndCategoryPair &
+ uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
+ auto categoryPair = ProfilingCategoryPair(
+ flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
+ return GetProfilingCategoryPairInfo(categoryPair).mLabel;
+ }
+ return label_;
+ }
+
+ const char* dynamicString() const { return dynamicString_; }
+
+ void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
+ ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
+ label_ = aLabel;
+ dynamicString_ = aDynamicString;
+ spOrScript = sp;
+ // pcOffsetIfJS_ is not set and must not be used on label frames.
+ flagsAndCategoryPair_ =
+ uint32_t(Flags::IS_LABEL_FRAME) |
+ (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
+ MOZ_ASSERT(isLabelFrame());
+ }
+
+ void initSpMarkerFrame(void* sp) {
+ label_ = "";
+ dynamicString_ = nullptr;
+ spOrScript = sp;
+ // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
+ flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
+ (uint32_t(ProfilingCategoryPair::OTHER)
+ << uint32_t(Flags::FLAGS_BITCOUNT));
+ MOZ_ASSERT(isSpMarkerFrame());
+ }
+
+ void initJsFrame(const char* aLabel, const char* aDynamicString,
+ void* /* JSScript* */ aScript, int32_t aOffset,
+ uint64_t aRealmID) {
+ label_ = aLabel;
+ dynamicString_ = aDynamicString;
+ spOrScript = aScript;
+ pcOffsetIfJS_ = aOffset;
+ realmID_ = aRealmID;
+ flagsAndCategoryPair_ =
+ uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS)
+ << uint32_t(Flags::FLAGS_BITCOUNT));
+ MOZ_ASSERT(isJsFrame());
+ }
+
+ uint32_t flags() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
+ }
+
+ ProfilingCategoryPair categoryPair() const {
+ return ProfilingCategoryPair(flagsAndCategoryPair_ >>
+ uint32_t(Flags::FLAGS_BITCOUNT));
+ }
+
+ uint64_t realmID() const { return realmID_; }
+
+ void* stackAddress() const {
+ MOZ_ASSERT(!isJsFrame());
+ return spOrScript;
+ }
+
+ // Note that the pointer returned might be invalid.
+ void* rawScript() const {
+ MOZ_ASSERT(isJsFrame());
+ return spOrScript;
+ }
+ void setRawScript(void* aScript) {
+ MOZ_ASSERT(isJsFrame());
+ spOrScript = aScript;
+ }
+
+ int32_t pcOffset() const {
+ MOZ_ASSERT(isJsFrame());
+ return pcOffsetIfJS_;
+ }
+
+ void setPCOffset(int32_t aOffset) {
+ MOZ_ASSERT(isJsFrame());
+ pcOffsetIfJS_ = aOffset;
+ }
+
+ // The offset of a pc into a script's code can actually be 0, so to
+ // signify a nullptr pc, use a -1 index. This is checked against in
+ // pc() and setPC() to set/get the right pc.
+ static const int32_t NullPCOffset = -1;
+};
+
+// Each thread has its own ProfilingStack. That thread modifies the
+// ProfilingStack, pushing and popping elements as necessary.
+//
+// The ProfilingStack is also read periodically by the profiler's sampler
+// thread. This happens only when the thread that owns the ProfilingStack is
+// suspended. So there are no genuine parallel accesses.
+//
+// However, it is possible for pushing/popping to be interrupted by a periodic
+// sample. Because of this, we need pushing/popping to be effectively atomic.
+//
+// - When pushing a new frame, we increment the stack pointer -- making the new
+// frame visible to the sampler thread -- only after the new frame has been
+// fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
+// the increment is a release-store, which ensures that this store is not
+// reordered before the writes of the frame.
+//
+// - When popping an old frame, the only operation is the decrementing of the
+// stack pointer, which is obviously atomic.
+//
+class ProfilingStack final {
+ public:
+ ProfilingStack() = default;
+
+ MFBT_API ~ProfilingStack();
+
+ void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
+ ProfilingCategoryPair categoryPair, uint32_t flags = 0) {
+ // This thread is the only one that ever changes the value of
+ // stackPointer.
+ // Store the value of the atomic in a non-atomic local variable so that
+ // the compiler won't generate two separate loads from the atomic for
+ // the size check and the frames[] array indexing operation.
+ uint32_t stackPointerVal = stackPointer;
+
+ if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
+ categoryPair, flags);
+
+ // This must happen at the end! The compiler will not reorder this
+ // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
+ // the writes above will not be reordered below the stackPointer store.
+ // Do the read and the write as two separate statements, in order to
+ // make it clear that we don't need an atomic increment, which would be
+ // more expensive on x86 than the separate operations done here.
+ // However, don't use stackPointerVal here; instead, allow the compiler
+ // to turn this store into a non-atomic increment instruction which
+ // takes up less code size.
+ stackPointer = stackPointer + 1;
+ }
+
+ void pushSpMarkerFrame(void* sp) {
+ uint32_t oldStackPointer = stackPointer;
+
+ if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[oldStackPointer].initSpMarkerFrame(sp);
+
+ // This must happen at the end, see the comment in pushLabelFrame.
+ stackPointer = oldStackPointer + 1;
+ }
+
+ void pushJsOffsetFrame(const char* label, const char* dynamicString,
+ void* script, int32_t offset, uint64_t aRealmID) {
+ // This thread is the only one that ever changes the value of
+ // stackPointer. Only load the atomic once.
+ uint32_t oldStackPointer = stackPointer;
+
+ if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset,
+ aRealmID);
+
+ // This must happen at the end, see the comment in pushLabelFrame.
+ stackPointer = stackPointer + 1;
+ }
+
+ void pop() {
+ MOZ_ASSERT(stackPointer > 0);
+ // Do the read and the write as two separate statements, in order to
+ // make it clear that we don't need an atomic decrement, which would be
+ // more expensive on x86 than the separate operations done here.
+ // This thread is the only one that ever changes the value of
+ // stackPointer.
+ uint32_t oldStackPointer = stackPointer;
+ stackPointer = oldStackPointer - 1;
+ }
+
+ uint32_t stackSize() const { return stackPointer; }
+ uint32_t stackCapacity() const { return capacity; }
+
+ private:
+ // Out of line path for expanding the buffer, since otherwise this would get
+ // inlined in every DOM WebIDL call.
+ MFBT_API MOZ_COLD void ensureCapacitySlow();
+
+ // No copying.
+ ProfilingStack(const ProfilingStack&) = delete;
+ void operator=(const ProfilingStack&) = delete;
+
+ // No moving either.
+ ProfilingStack(ProfilingStack&&) = delete;
+ void operator=(ProfilingStack&&) = delete;
+
+ uint32_t capacity = 0;
+
+ public:
+ // The pointer to the stack frames, this is read from the profiler thread and
+ // written from the current thread.
+ //
+ // This is effectively a unique pointer.
+ Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr};
+
+ // This may exceed the capacity, so instead use the stackSize() method to
+ // determine the number of valid frames in stackFrames. When this is less
+ // than stackCapacity(), it refers to the first free stackframe past the top
+ // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
+ // frame).
+ //
+ // WARNING WARNING WARNING
+ //
+ // This is an atomic variable that uses ReleaseAcquire memory ordering.
+ // See the "Concurrency considerations" paragraph at the top of this file
+ // for more details.
+ Atomic<uint32_t, ReleaseAcquire> stackPointer{0};
+};
+
+class AutoGeckoProfilerEntry;
+class GeckoProfilerEntryMarker;
+class GeckoProfilerBaselineOSRMarker;
+
+class GeckoProfilerThread {
+ friend class AutoGeckoProfilerEntry;
+ friend class GeckoProfilerEntryMarker;
+ friend class GeckoProfilerBaselineOSRMarker;
+
+ ProfilingStack* profilingStack_;
+
+ // Same as profilingStack_ if the profiler is currently active, otherwise
+ // null.
+ ProfilingStack* profilingStackIfEnabled_;
+
+ public:
+ MFBT_API GeckoProfilerThread();
+
+ uint32_t stackPointer() {
+ MOZ_ASSERT(infraInstalled());
+ return profilingStack_->stackPointer;
+ }
+ ProfilingStackFrame* stack() { return profilingStack_->frames; }
+ ProfilingStack* getProfilingStack() { return profilingStack_; }
+ ProfilingStack* getProfilingStackIfEnabled() {
+ return profilingStackIfEnabled_;
+ }
+
+ /*
+ * True if the profiler infrastructure is setup. Should be true in builds
+ * that include profiler support except during early startup or late
+ * shutdown. Unrelated to the presence of the Gecko Profiler addon.
+ */
+ bool infraInstalled() { return profilingStack_ != nullptr; }
+
+ MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
+ void enable(bool enable) {
+ profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
+ }
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* BaseProfilingStack_h */
diff --git a/mozglue/baseprofiler/public/FailureLatch.h b/mozglue/baseprofiler/public/FailureLatch.h
new file mode 100644
index 0000000000..10205e009b
--- /dev/null
+++ b/mozglue/baseprofiler/public/FailureLatch.h
@@ -0,0 +1,217 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains an interface `FailureLatch`, and some implementation
+// helpers that may be used across a range of classes and functions to handle
+// failures at any point during a process, and share that failure state so that
+// the process may gracefully stop quickly and report the first error.
+//
+// It could be thought as a replacement for C++ exceptions, but it's less strong
+// (cancellations may be delayed).
+// Now, if possible, mozilla::Result may be a better option as C++ exceptions
+// replacement, as it is more visible in all affected functions.
+// Consider FailureLatch if failures may happen in different places, but where
+// `return`ing this potential failure from all functions would be too arduous.
+
+#ifndef mozilla_FailureLatch_h
+#define mozilla_FailureLatch_h
+
+#include <mozilla/Assertions.h>
+
+#include <string>
+
+namespace mozilla {
+
+// ----------------------------------------------------------------------------
+// Main interface
+// ----------------------------------------------------------------------------
+
+// Interface handling a failure latch (starting in a successful state, the first
+// failure gets recorded, subsequent failures are ignored.)
+class FailureLatch {
+ public:
+ virtual ~FailureLatch() = default;
+
+ // Can this ever fail? (This may influence how some code deals with
+ // failures, e.g., if infallible, OOMs should assert&crash.)
+ [[nodiscard]] virtual bool Fallible() const = 0;
+
+ // Set latch in its failed state because of an external cause.
+ // The first call sets the reason, subsequent calls are ignored.
+ virtual void SetFailure(std::string aReason) = 0;
+
+ // Has there been any failure so far?
+ [[nodiscard]] virtual bool Failed() const = 0;
+
+ // Return first failure string, may be null if not failed yet.
+ [[nodiscard]] virtual const char* GetFailure() const = 0;
+
+ // Retrieve the one source FailureLatch. It could reference `*this`!
+ // This may be used by dependent proxy FailureLatch'es to find where to
+ // redirect calls.
+ [[nodiscard]] virtual const FailureLatch& SourceFailureLatch() const = 0;
+ [[nodiscard]] virtual FailureLatch& SourceFailureLatch() = 0;
+
+ // Non-virtual helpers.
+
+ // Transfer any failure from another FailureLatch.
+ void SetFailureFrom(const FailureLatch& aOther) {
+ if (Failed()) {
+ return;
+ }
+ if (const char* otherFailure = aOther.GetFailure(); otherFailure) {
+ SetFailure(otherFailure);
+ }
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Concrete implementations
+// ----------------------------------------------------------------------------
+
+// Concrete infallible FailureLatch class.
+// Any `SetFailure` leads to an assert-crash, so the final runtime result can
+// always be assumed to be succesful.
+class FailureLatchInfallibleSource final : public FailureLatch {
+ public:
+ [[nodiscard]] bool Fallible() const final { return false; }
+
+ void SetFailure(std::string aReason) final {
+ MOZ_RELEASE_ASSERT(false,
+ "SetFailure in infallible FailureLatchInfallibleSource");
+ }
+
+ [[nodiscard]] bool Failed() const final { return false; }
+
+ [[nodiscard]] const char* GetFailure() const final { return nullptr; }
+
+ [[nodiscard]] const ::mozilla::FailureLatch& SourceFailureLatch()
+ const final {
+ return *this;
+ }
+
+ [[nodiscard]] ::mozilla::FailureLatch& SourceFailureLatch() final {
+ return *this;
+ }
+
+ // Singleton FailureLatchInfallibleSource that may be used as default
+ // FailureLatch proxy.
+ static FailureLatchInfallibleSource& Singleton() {
+ static FailureLatchInfallibleSource singleton;
+ return singleton;
+ }
+};
+
+// Concrete FailureLatch class, intended to be intantiated as an object shared
+// between classes and functions that are part of a long operation, so that
+// failures can happen anywhere and be visible everywhere.
+// Not thread-safe.
+class FailureLatchSource final : public FailureLatch {
+ public:
+ [[nodiscard]] bool Fallible() const final { return true; }
+
+ void SetFailure(std::string aReason) final {
+ if (!mFailed) {
+ mFailed = true;
+ mReason = std::move(aReason);
+ }
+ }
+
+ [[nodiscard]] bool Failed() const final { return mFailed; }
+
+ [[nodiscard]] const char* GetFailure() const final {
+ return mFailed ? mReason.c_str() : nullptr;
+ }
+
+ [[nodiscard]] const FailureLatch& SourceFailureLatch() const final {
+ return *this;
+ }
+
+ [[nodiscard]] FailureLatch& SourceFailureLatch() final { return *this; }
+
+ private:
+ bool mFailed = false;
+ std::string mReason;
+};
+
+// ----------------------------------------------------------------------------
+// Helper macros, to be used in FailureLatch-derived classes
+// ----------------------------------------------------------------------------
+
+// Classes deriving from FailureLatch can use this to forward virtual calls to
+// another FailureLatch.
+#define FAILURELATCH_IMPL_PROXY(FAILURELATCH_REF) \
+ [[nodiscard]] bool Fallible() const final { \
+ return static_cast<const ::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .Fallible(); \
+ } \
+ void SetFailure(std::string aReason) final { \
+ static_cast<::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .SetFailure(std::move(aReason)); \
+ } \
+ [[nodiscard]] bool Failed() const final { \
+ return static_cast<const ::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .Failed(); \
+ } \
+ [[nodiscard]] const char* GetFailure() const final { \
+ return static_cast<const ::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .GetFailure(); \
+ } \
+ [[nodiscard]] const FailureLatch& SourceFailureLatch() const final { \
+ return static_cast<const ::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .SourceFailureLatch(); \
+ } \
+ [[nodiscard]] FailureLatch& SourceFailureLatch() final { \
+ return static_cast<::mozilla::FailureLatch&>(FAILURELATCH_REF) \
+ .SourceFailureLatch(); \
+ }
+
+// Classes deriving from FailureLatch can use this to forward virtual calls to
+// another FailureLatch through a pointer, unless it's null in which case act
+// like an infallible FailureLatch.
+#define FAILURELATCH_IMPL_PROXY_OR_INFALLIBLE(FAILURELATCH_PTR, CLASS_NAME) \
+ [[nodiscard]] bool Fallible() const final { \
+ return FAILURELATCH_PTR \
+ ? static_cast<const ::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->Fallible() \
+ : false; \
+ } \
+ void SetFailure(std::string aReason) final { \
+ if (FAILURELATCH_PTR) { \
+ static_cast<::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->SetFailure(std::move(aReason)); \
+ } else { \
+ MOZ_RELEASE_ASSERT(false, "SetFailure in infallible " #CLASS_NAME); \
+ } \
+ } \
+ [[nodiscard]] bool Failed() const final { \
+ return FAILURELATCH_PTR \
+ ? static_cast<const ::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->Failed() \
+ : false; \
+ } \
+ [[nodiscard]] const char* GetFailure() const final { \
+ return FAILURELATCH_PTR \
+ ? static_cast<const ::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->GetFailure() \
+ : nullptr; \
+ } \
+ [[nodiscard]] const FailureLatch& SourceFailureLatch() const final { \
+ return FAILURELATCH_PTR \
+ ? static_cast<const ::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->SourceFailureLatch() \
+ : ::mozilla::FailureLatchInfallibleSource::Singleton(); \
+ } \
+ [[nodiscard]] FailureLatch& SourceFailureLatch() final { \
+ return FAILURELATCH_PTR \
+ ? static_cast<::mozilla::FailureLatch*>(FAILURELATCH_PTR) \
+ ->SourceFailureLatch() \
+ : ::mozilla::FailureLatchInfallibleSource::Singleton(); \
+ }
+
+} // namespace mozilla
+
+#endif /* mozilla_FailureLatch_h */
diff --git a/mozglue/baseprofiler/public/ModuloBuffer.h b/mozglue/baseprofiler/public/ModuloBuffer.h
new file mode 100644
index 0000000000..80e765279e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ModuloBuffer.h
@@ -0,0 +1,618 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ModuloBuffer_h
+#define ModuloBuffer_h
+
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/UniquePtr.h"
+
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// The ModuloBuffer class is a circular buffer that holds raw byte values, with
+// data-read/write helpers.
+//
+// OffsetT: Type of the internal offset into the buffer of bytes, it should be
+// large enough to access all bytes of the buffer. It will also be used as
+// Length (in bytes) of the buffer and of any subset. Default uint32_t
+// IndexT: Type of the external index, it should be large enough that overflows
+// should not happen during the lifetime of the ModuloBuffer.
+//
+// The basic usage is to create an iterator-like object with `ReaderAt(Index)`
+// or `WriterAt(Index)`, and use it to read/write data blobs. Iterators
+// automatically manage the wrap-around (through "Modulo", which is effectively
+// an AND-masking with the PowerOfTwo buffer size.)
+//
+// There is zero safety: No thread safety, no checks that iterators may be
+// overwriting data that's still to be read, etc. It's up to the caller to add
+// adequate checks.
+// The intended use is as an underlying buffer for a safer container.
+template <typename OffsetT = uint32_t, typename IndexT = uint64_t>
+class ModuloBuffer {
+ public:
+ using Byte = uint8_t;
+ static_assert(sizeof(Byte) == 1, "ModuloBuffer::Byte must be 1 byte");
+ using Offset = OffsetT;
+ static_assert(!std::numeric_limits<Offset>::is_signed,
+ "ModuloBuffer::Offset must be an unsigned integral type");
+ using Length = Offset;
+ using Index = IndexT;
+ static_assert(!std::numeric_limits<Index>::is_signed,
+ "ModuloBuffer::Index must be an unsigned integral type");
+ static_assert(sizeof(Index) >= sizeof(Offset),
+ "ModuloBuffer::Index size must >= Offset");
+
+ // Create a buffer of the given length.
+ explicit ModuloBuffer(PowerOfTwo<Length> aLength)
+ : mMask(aLength.Mask()),
+ mBuffer(WrapNotNull(new Byte[aLength.Value()])),
+ mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+ // Take ownership of an existing buffer. Existing contents is ignored.
+ // Done by extracting the raw pointer from UniquePtr<Byte[]>, and adding
+ // an equivalent `delete[]` in `mBufferDeleter`.
+ ModuloBuffer(UniquePtr<Byte[]> aExistingBuffer, PowerOfTwo<Length> aLength)
+ : mMask(aLength.Mask()),
+ mBuffer(WrapNotNull(aExistingBuffer.release())),
+ mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+ // Use an externally-owned buffer. Existing contents is ignored.
+ ModuloBuffer(Byte* aExternalBuffer, PowerOfTwo<Length> aLength)
+ : mMask(aLength.Mask()), mBuffer(WrapNotNull(aExternalBuffer)) {}
+
+ // Disallow copying, as we may uniquely own the resource.
+ ModuloBuffer(const ModuloBuffer& aOther) = delete;
+ ModuloBuffer& operator=(const ModuloBuffer& aOther) = delete;
+
+ // Allow move-construction. Stealing ownership if the original had it.
+ // This effectively prevents copy construction, and all assignments; needed so
+ // that a ModuloBuffer may be initialized from a separate construction.
+ // The moved-from ModuloBuffer still points at the resource but doesn't own
+ // it, so it won't try to free it; but accesses are not guaranteed, so it
+ // should not be used anymore.
+ ModuloBuffer(ModuloBuffer&& aOther)
+ : mMask(std::move(aOther.mMask)),
+ mBuffer(std::move(aOther.mBuffer)),
+ mBufferDeleter(std::move(aOther.mBufferDeleter)) {
+ // The above move leaves `aOther.mBufferDeleter` in a valid state but with
+ // an unspecified value, so it could theoretically still contain the
+ // original function, which would be bad because we don't want aOther to
+ // delete the resource that `this` now owns.
+ if (aOther.mBufferDeleter) {
+ // `aOther` still had a non-empty deleter, reset it.
+ aOther.mBufferDeleter = nullptr;
+ }
+ }
+
+ // Disallow assignment, as we have some `const` members.
+ ModuloBuffer& operator=(ModuloBuffer&& aOther) = delete;
+
+ // Destructor, deletes the resource if we uniquely own it.
+ ~ModuloBuffer() {
+ if (mBufferDeleter) {
+ mBufferDeleter(mBuffer);
+ }
+ }
+
+ PowerOfTwo<Length> BufferLength() const {
+ return PowerOfTwo<Length>(mMask.MaskValue() + 1);
+ }
+
+ // Size of external resources.
+ // Note: `mBufferDeleter`'s potential external data (for its captures) is not
+ // included, as it's hidden in the `std::function` implementation.
+ size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+ if (!mBufferDeleter) {
+ // If we don't have a buffer deleter, assume we don't own the data, so
+ // it's probably on the stack, or should be reported by its owner.
+ return 0;
+ }
+ return aMallocSizeOf(mBuffer);
+ }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+ }
+
+ ProfileBufferEntryReader EntryReaderFromTo(
+ Index aStart, Index aEnd, ProfileBufferBlockIndex aBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex) const {
+ using EntrySpan = Span<const ProfileBufferEntryReader::Byte>;
+ if (aStart == aEnd) {
+ return ProfileBufferEntryReader{};
+ }
+ // Don't allow over-wrapping.
+ MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+ // Start offset in 0 .. (buffer size - 1)
+ Offset start = static_cast<Offset>(aStart) & mMask;
+ // End offset in 1 .. (buffer size)
+ Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+ if (start < end) {
+ // Segment doesn't cross buffer threshold, one span is enough.
+ return ProfileBufferEntryReader{EntrySpan(&mBuffer[start], end - start),
+ aBlockIndex, aNextBlockIndex};
+ }
+ // Segment crosses buffer threshold, we need one span until the end and one
+ // span restarting at the beginning of the buffer.
+ return ProfileBufferEntryReader{
+ EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+ EntrySpan(&mBuffer[0], end), aBlockIndex, aNextBlockIndex};
+ }
+
+ // Return an entry writer for the given range.
+ ProfileBufferEntryWriter EntryWriterFromTo(Index aStart, Index aEnd) const {
+ using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+ if (aStart == aEnd) {
+ return ProfileBufferEntryWriter{};
+ }
+ MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+ // Start offset in 0 .. (buffer size - 1)
+ Offset start = static_cast<Offset>(aStart) & mMask;
+ // End offset in 1 .. (buffer size)
+ Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+ if (start < end) {
+ // Segment doesn't cross buffer threshold, one span is enough.
+ return ProfileBufferEntryWriter{
+ EntrySpan(&mBuffer[start], end - start),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+ }
+ // Segment crosses buffer threshold, we need one span until the end and one
+ // span restarting at the beginning of the buffer.
+ return ProfileBufferEntryWriter{
+ EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+ EntrySpan(&mBuffer[0], end),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+ }
+
+ // Emplace an entry writer into `aMaybeEntryWriter` for the given range.
+ void EntryWriterFromTo(Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter,
+ Index aStart, Index aEnd) const {
+ MOZ_ASSERT(aMaybeEntryWriter.isNothing(),
+ "Reference entry writer should be Nothing.");
+ using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+ if (aStart == aEnd) {
+ return;
+ }
+ MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+ // Start offset in 0 .. (buffer size - 1)
+ Offset start = static_cast<Offset>(aStart) & mMask;
+ // End offset in 1 .. (buffer size)
+ Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+ if (start < end) {
+ // Segment doesn't cross buffer threshold, one span is enough.
+ aMaybeEntryWriter.emplace(
+ EntrySpan(&mBuffer[start], end - start),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+ } else {
+ // Segment crosses buffer threshold, we need one span until the end and
+ // one span restarting at the beginning of the buffer.
+ aMaybeEntryWriter.emplace(
+ EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+ EntrySpan(&mBuffer[0], end),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+ }
+ }
+
+ // All ModuloBuffer operations should be done through this iterator, which has
+ // an effectively infinite range. The underlying wrapping-around is hidden.
+ // Use `ReaderAt(Index)` or `WriterAt(Index)` to create it.
+ //
+ // `const Iterator<...>` means the iterator itself cannot change, i.e., it
+ // cannot move, and only its const methods are available. Note that these
+ // const methods may still be used to modify the buffer contents (e.g.:
+ // `operator*()`, `Poke()`).
+ //
+ // `Iterator</*IsBufferConst=*/true>` means the buffer contents cannot be
+ // modified, i.e., write operations are forbidden, but the iterator may still
+ // move if non-const itself.
+ template <bool IsBufferConst>
+ class Iterator {
+ // Alias to const- or mutable-`ModuloBuffer` depending on `IsBufferConst`.
+ using ConstOrMutableBuffer =
+ std::conditional_t<IsBufferConst, const ModuloBuffer, ModuloBuffer>;
+
+ // Implementation note about the strange enable-if's below:
+ // `template <bool NotIBC = !IsBufferConst> enable_if_t<NotIBC>`
+ // which intuitively could be simplified to:
+ // `enable_if_t<!IsBufferConst>`
+ // The former extra-templated syntax is in fact necessary to delay
+ // instantiation of these functions until they are actually needed.
+ //
+ // If we were just doing `enable_if_t<!IsBufferConst>`, this would only
+ // depend on the *class* (`ModuloBuffer<...>::Iterator`), which gets
+ // instantiated when a `ModuloBuffer` is created with some template
+ // arguments; at that point, all non-templated methods get instantiated, so
+ // there's no "SFINAE" happening, and `enable_if_t<...>` is actually doing
+ // `typename enable_if<...>::type` on the spot, but there is no `type` if
+ // `IsBufferConst` is true, so it just fails right away. E.g.:
+ // error: no type named 'type' in 'std::enable_if<false, void>';
+ // 'enable_if' cannot be used to disable this declaration
+ // note: in instantiation of template type alias 'enable_if_t'
+ // > std::enable_if_t<!IsBufferConst> WriteObject(const T& aObject) {
+ // in instantiation of template class
+ // 'mozilla::ModuloBuffer<...>::Iterator<true>'
+ // > auto it = mb.ReaderAt(1);
+ //
+ // By adding another template level `template <bool NotIsBufferConst =
+ // !IsBufferConst>`, the instantiation is delayed until the function is
+ // actually invoked somewhere, e.g. `it.Poke(...);`.
+ // So at that invocation point, the compiler looks for a "Poke" name in it,
+ // and considers potential template instantiations that could work. The
+ // `enable_if_t` is *now* attempted, with `NotIsBufferConst` taking its
+ // value from `!IsBufferConst`:
+ // - If `IsBufferConst` is false, `NotIsBufferConst` is true,
+ // `enable_if<NotIsBufferConst>` does define a `type` (`void` by default),
+ // so `enable_if_t` happily becomes `void`, the function exists and may be
+ // called.
+ // - Otherwise if `IsBufferConst` is true, `NotIsBufferConst` is false,
+ // `enable_if<NotIsBufferConst>` does *not* define a `type`, therefore
+ // `enable_if_t` produces an error because there is no `type`. Now "SFINAE"
+ // happens: This "Substitution Failure Is Not An Error" (by itself)... But
+ // then, there are no other functions named "Poke" as requested in the
+ // `it.Poke(...);` call, so we are now getting an error (can't find
+ // function), as expected because `it` had `IsBufferConst`==true. (But at
+ // least the compiler waited until this invocation attempt before outputting
+ // an error.)
+ //
+ // C++ is fun!
+
+ public:
+ // These definitions are expected by std functions, to recognize this as an
+ // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+ using difference_type = Index;
+ using value_type = Byte;
+ using pointer = std::conditional_t<IsBufferConst, const Byte*, Byte*>;
+ using reference = std::conditional_t<IsBufferConst, const Byte&, Byte&>;
+ using iterator_category = std::random_access_iterator_tag;
+
+ // Can always copy/assign from the same kind of iterator.
+ Iterator(const Iterator& aRhs) = default;
+ Iterator& operator=(const Iterator& aRhs) = default;
+
+ // Can implicitly copy an Iterator-to-mutable (reader+writer) to
+ // Iterator-to-const (reader-only), but not the reverse.
+ template <bool IsRhsBufferConst,
+ typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+ MOZ_IMPLICIT Iterator(const Iterator<IsRhsBufferConst>& aRhs)
+ : mModuloBuffer(aRhs.mModuloBuffer), mIndex(aRhs.mIndex) {}
+
+ // Can implicitly assign from an Iterator-to-mutable (reader+writer) to
+ // Iterator-to-const (reader-only), but not the reverse.
+ template <bool IsRhsBufferConst,
+ typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+ Iterator& operator=(const Iterator<IsRhsBufferConst>& aRhs) {
+ mModuloBuffer = aRhs.mModuloBuffer;
+ mIndex = aRhs.mIndex;
+ return *this;
+ }
+
+ // Current location of the iterator in the `Index` range.
+ // Note that due to wrapping, multiple indices may effectively point at the
+ // same byte in the buffer.
+ Index CurrentIndex() const { return mIndex; }
+
+ // Location comparison in the `Index` range. I.e., two `Iterator`s may look
+ // unequal, but refer to the same buffer location.
+ // Must be on the same buffer.
+ bool operator==(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex == aRhs.mIndex;
+ }
+ bool operator!=(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex != aRhs.mIndex;
+ }
+ bool operator<(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex < aRhs.mIndex;
+ }
+ bool operator<=(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex <= aRhs.mIndex;
+ }
+ bool operator>(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex > aRhs.mIndex;
+ }
+ bool operator>=(const Iterator& aRhs) const {
+ MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+ return mIndex >= aRhs.mIndex;
+ }
+
+ // Movement in the `Index` range.
+ Iterator& operator++() {
+ ++mIndex;
+ return *this;
+ }
+ Iterator operator++(int) {
+ Iterator here(*mModuloBuffer, mIndex);
+ ++mIndex;
+ return here;
+ }
+ Iterator& operator--() {
+ --mIndex;
+ return *this;
+ }
+ Iterator operator--(int) {
+ Iterator here(*mModuloBuffer, mIndex);
+ --mIndex;
+ return here;
+ }
+ Iterator& operator+=(Length aLength) {
+ mIndex += aLength;
+ return *this;
+ }
+ Iterator operator+(Length aLength) const {
+ return Iterator(*mModuloBuffer, mIndex + aLength);
+ }
+ friend Iterator operator+(Length aLength, const Iterator& aIt) {
+ return aIt + aLength;
+ }
+ Iterator& operator-=(Length aLength) {
+ mIndex -= aLength;
+ return *this;
+ }
+ Iterator operator-(Length aLength) const {
+ return Iterator(*mModuloBuffer, mIndex - aLength);
+ }
+
+ // Distance from `aRef` to here in the `Index` range.
+ // May be negative (as 2's complement) if `aRef > *this`.
+ Index operator-(const Iterator& aRef) const {
+ MOZ_ASSERT(mModuloBuffer == aRef.mModuloBuffer);
+ return mIndex - aRef.mIndex;
+ }
+
+ // Dereference a single byte (read-only if `IsBufferConst` is true).
+ reference operator*() const {
+ return mModuloBuffer->mBuffer[OffsetInBuffer()];
+ }
+
+ // Random-access dereference.
+ reference operator[](Length aLength) const { return *(*this + aLength); }
+
+ // Write data (if `IsBufferConst` is false) but don't move iterator.
+ template <bool NotIsBufferConst = !IsBufferConst>
+ std::enable_if_t<NotIsBufferConst> Poke(const void* aSrc,
+ Length aLength) const {
+ // Don't allow data larger than the buffer.
+ MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+ // Offset inside the buffer (corresponding to our Index).
+ Offset offset = OffsetInBuffer();
+ // Compute remaining bytes between this offset and the end of the buffer.
+ Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+ if (MOZ_LIKELY(remaining >= aLength)) {
+ // Enough space to write everything before the end.
+ memcpy(&mModuloBuffer->mBuffer[offset], aSrc, aLength);
+ } else {
+ // Not enough space. Write as much as possible before the end.
+ memcpy(&mModuloBuffer->mBuffer[offset], aSrc, remaining);
+ // And then continue from the beginning of the buffer.
+ memcpy(&mModuloBuffer->mBuffer[0],
+ static_cast<const Byte*>(aSrc) + remaining,
+ (aLength - remaining));
+ }
+ }
+
+ // Write object data (if `IsBufferConst` is false) but don't move iterator.
+ // Note that this copies bytes from the object, with the intent to read them
+ // back later. Restricted to trivially-copyable types, which support this
+ // without Undefined Behavior!
+ template <typename T, bool NotIsBufferConst = !IsBufferConst>
+ std::enable_if_t<NotIsBufferConst> PokeObject(const T& aObject) const {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "PokeObject<T> - T must be trivially copyable");
+ return Poke(&aObject, sizeof(T));
+ }
+
+ // Write data (if `IsBufferConst` is false) and move iterator ahead.
+ template <bool NotIsBufferConst = !IsBufferConst>
+ std::enable_if_t<NotIsBufferConst> Write(const void* aSrc, Length aLength) {
+ Poke(aSrc, aLength);
+ mIndex += aLength;
+ }
+
+ // Write object data (if `IsBufferConst` is false) and move iterator ahead.
+ // Note that this copies bytes from the object, with the intent to read them
+ // back later. Restricted to trivially-copyable types, which support this
+ // without Undefined Behavior!
+ template <typename T, bool NotIsBufferConst = !IsBufferConst>
+ std::enable_if_t<NotIsBufferConst> WriteObject(const T& aObject) {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "WriteObject<T> - T must be trivially copyable");
+ return Write(&aObject, sizeof(T));
+ }
+
+ // Number of bytes needed to represent `aValue` in unsigned LEB128.
+ template <typename T>
+ static unsigned ULEB128Size(T aValue) {
+ return ::mozilla::ULEB128Size(aValue);
+ }
+
+ // Write number as unsigned LEB128 (if `IsBufferConst` is false) and move
+ // iterator ahead.
+ template <typename T, bool NotIsBufferConst = !IsBufferConst>
+ std::enable_if_t<NotIsBufferConst> WriteULEB128(T aValue) {
+ ::mozilla::WriteULEB128(aValue, *this);
+ }
+
+ // Read data but don't move iterator.
+ void Peek(void* aDst, Length aLength) const {
+ // Don't allow data larger than the buffer.
+ MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+ // Offset inside the buffer (corresponding to our Index).
+ Offset offset = OffsetInBuffer();
+ // Compute remaining bytes between this offset and the end of the buffer.
+ Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+ if (MOZ_LIKELY(remaining >= aLength)) {
+ // Can read everything we need before the end of the buffer.
+ memcpy(aDst, &mModuloBuffer->mBuffer[offset], aLength);
+ } else {
+ // Read as much as possible before the end of the buffer.
+ memcpy(aDst, &mModuloBuffer->mBuffer[offset], remaining);
+ // And then continue from the beginning of the buffer.
+ memcpy(static_cast<Byte*>(aDst) + remaining, &mModuloBuffer->mBuffer[0],
+ (aLength - remaining));
+ }
+ }
+
+ // Read data into an object but don't move iterator.
+ // Note that this overwrites `aObject` with bytes from the buffer.
+ // Restricted to trivially-copyable types, which support this without
+ // Undefined Behavior!
+ template <typename T>
+ void PeekIntoObject(T& aObject) const {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "PeekIntoObject<T> - T must be trivially copyable");
+ Peek(&aObject, sizeof(T));
+ }
+
+ // Read data as an object but don't move iterator.
+ // Note that this creates an default `T` first, and then overwrites it with
+ // bytes from the buffer. Restricted to trivially-copyable types, which
+ // support this without Undefined Behavior!
+ template <typename T>
+ T PeekObject() const {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "PeekObject<T> - T must be trivially copyable");
+ T object;
+ PeekIntoObject(object);
+ return object;
+ }
+
+ // Read data and move iterator ahead.
+ void Read(void* aDst, Length aLength) {
+ Peek(aDst, aLength);
+ mIndex += aLength;
+ }
+
+ // Read data into a mutable iterator and move both iterators ahead.
+ void ReadInto(Iterator</* IsBufferConst */ false>& aDst, Length aLength) {
+ // Don't allow data larger than the buffer.
+ MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+ MOZ_ASSERT(aLength <= aDst.mModuloBuffer->BufferLength().Value());
+ // Offset inside the buffer (corresponding to our Index).
+ Offset offset = OffsetInBuffer();
+ // Compute remaining bytes between this offset and the end of the buffer.
+ Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+ if (MOZ_LIKELY(remaining >= aLength)) {
+ // Can read everything we need before the end of the buffer.
+ aDst.Write(&mModuloBuffer->mBuffer[offset], aLength);
+ } else {
+ // Read as much as possible before the end of the buffer.
+ aDst.Write(&mModuloBuffer->mBuffer[offset], remaining);
+ // And then continue from the beginning of the buffer.
+ aDst.Write(&mModuloBuffer->mBuffer[0], (aLength - remaining));
+ }
+ mIndex += aLength;
+ }
+
+ // Read data into an object and move iterator ahead.
+ // Note that this overwrites `aObject` with bytes from the buffer.
+ // Restricted to trivially-copyable types, which support this without
+ // Undefined Behavior!
+ template <typename T>
+ void ReadIntoObject(T& aObject) {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "ReadIntoObject<T> - T must be trivially copyable");
+ Read(&aObject, sizeof(T));
+ }
+
+ // Read data as an object and move iterator ahead.
+ // Note that this creates an default `T` first, and then overwrites it with
+ // bytes from the buffer. Restricted to trivially-copyable types, which
+ // support this without Undefined Behavior!
+ template <typename T>
+ T ReadObject() {
+ static_assert(std::is_trivially_copyable<T>::value,
+ "ReadObject<T> - T must be trivially copyable");
+ T object;
+ ReadIntoObject(object);
+ return object;
+ }
+
+ // Read an unsigned LEB128 number and move iterator ahead.
+ template <typename T>
+ T ReadULEB128() {
+ return ::mozilla::ReadULEB128<T>(*this);
+ }
+
+ private:
+ // Only a ModuloBuffer can instantiate its iterator.
+ friend class ModuloBuffer;
+
+ Iterator(ConstOrMutableBuffer& aBuffer, Index aIndex)
+ : mModuloBuffer(WrapNotNull(&aBuffer)), mIndex(aIndex) {}
+
+ // Convert the Iterator's mIndex into an offset inside the byte buffer.
+ Offset OffsetInBuffer() const {
+ return static_cast<Offset>(mIndex) & mModuloBuffer->mMask;
+ }
+
+ // ModuloBuffer that this Iterator operates on.
+ // Using a non-null pointer instead of a reference, to allow re-assignment
+ // of an Iterator variable.
+ NotNull<ConstOrMutableBuffer*> mModuloBuffer;
+
+ // Position of this iterator in the wider `Index` range. (Will be wrapped
+ // around as needed when actually accessing bytes from the buffer.)
+ Index mIndex;
+ };
+
+ // Shortcut to iterator to const (read-only) data.
+ using Reader = Iterator<true>;
+ // Shortcut to iterator to non-const (read/write) data.
+ using Writer = Iterator<false>;
+
+ // Create an iterator to const data at the given index.
+ Reader ReaderAt(Index aIndex) const { return Reader(*this, aIndex); }
+
+ // Create an iterator to non-const data at the given index.
+ Writer WriterAt(Index aIndex) { return Writer(*this, aIndex); }
+
+#ifdef DEBUG
+ void Dump() const {
+ Length len = BufferLength().Value();
+ if (len > 128) {
+ len = 128;
+ }
+ for (Length i = 0; i < len; ++i) {
+ printf("%02x ", mBuffer[i]);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+ private:
+ // Mask used to convert an index to an offset in `mBuffer`
+ const PowerOfTwoMask<Offset> mMask;
+
+ // Buffer data. `const NotNull<...>` shows that `mBuffer is `const`, and
+ // `Byte* const` shows that the pointer cannot be changed to point at
+ // something else, but the pointed-at `Byte`s are writable.
+ const NotNull<Byte* const> mBuffer;
+
+ // Function used to release the buffer resource (if needed).
+ std::function<void(Byte*)> mBufferDeleter;
+};
+
+} // namespace mozilla
+
+#endif // ModuloBuffer_h
diff --git a/mozglue/baseprofiler/public/PowerOfTwo.h b/mozglue/baseprofiler/public/PowerOfTwo.h
new file mode 100644
index 0000000000..7d396c15e6
--- /dev/null
+++ b/mozglue/baseprofiler/public/PowerOfTwo.h
@@ -0,0 +1,322 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// PowerOfTwo is a value type that always hold a power of 2.
+// It has the same size as their underlying unsigned type, but offer the
+// guarantee of being a power of 2, which permits some optimizations when
+// involved in modulo operations (using masking instead of actual modulo).
+//
+// PowerOfTwoMask contains a mask corresponding to a power of 2.
+// E.g., 2^8 is 256 or 0x100, the corresponding mask is 2^8-1 or 255 or 0xFF.
+// It should be used instead of PowerOfTwo in situations where most operations
+// would be modulo, this saves having to recompute the mask from the stored
+// power of 2.
+//
+// One common use would be for ring-buffer containers with a power-of-2 size,
+// where an index is usually converted to an in-buffer offset by `i % size`.
+// Instead, the container could store a PowerOfTwo or PowerOfTwoMask, and do
+// `i % p2` or `i & p2m`, which is more efficient than for arbitrary sizes.
+//
+// Shortcuts for common 32- and 64-bit values: PowerOfTwo32, etc.
+//
+// To create constexpr constants, use MakePowerOfTwo<Type, Value>(), etc.
+
+#ifndef PowerOfTwo_h
+#define PowerOfTwo_h
+
+#include "mozilla/MathAlgorithms.h"
+
+#include <limits>
+
+namespace mozilla {
+
+// Compute the smallest power of 2 greater than or equal to aInput, except if
+// that would overflow in which case the highest possible power of 2 if chosen.
+// 0->1, 1->1, 2->2, 3->4, ... 2^31->2^31, 2^31+1->2^31 (for uint32_t), etc.
+template <typename T>
+T FriendlyRoundUpPow2(T aInput) {
+ // This is the same code as `RoundUpPow2()`, except we handle any type (that
+ // CeilingLog2 supports) and allow the greater-than-max-power case.
+ constexpr T max = T(1) << (sizeof(T) * CHAR_BIT - 1);
+ if (aInput >= max) {
+ return max;
+ }
+ return T(1) << CeilingLog2(aInput);
+}
+
+namespace detail {
+// Same function name `CountLeadingZeroes` with uint32_t and uint64_t overloads.
+inline uint_fast8_t CountLeadingZeroes(uint32_t aValue) {
+ MOZ_ASSERT(aValue != 0);
+ return detail::CountLeadingZeroes32(aValue);
+}
+inline uint_fast8_t CountLeadingZeroes(uint64_t aValue) {
+ MOZ_ASSERT(aValue != 0);
+ return detail::CountLeadingZeroes64(aValue);
+}
+// Refuse anything else.
+template <typename T>
+inline uint_fast8_t CountLeadingZeroes(T aValue) = delete;
+} // namespace detail
+
+// Compute the smallest 2^N-1 mask where aInput can fit.
+// I.e., `x & mask == x`, but `x & (mask >> 1) != x`.
+// Or looking at binary, we want a mask with as many leading zeroes as the
+// input, by right-shifting a full mask: (8-bit examples)
+// input: 00000000 00000001 00000010 00010110 01111111 10000000
+// N leading 0s: ^^^^^^^^ 8 ^^^^^^^ 7 ^^^^^^ 6 ^^^ 3 ^ 1 0
+// full mask: 11111111 11111111 11111111 11111111 11111111 11111111
+// full mask >> N: 00000000 00000001 00000011 00011111 01111111 11111111
+template <typename T>
+T RoundUpPow2Mask(T aInput) {
+ // Special case, as CountLeadingZeroes(0) is undefined. (And even if that was
+ // defined, shifting by the full type size is also undefined!)
+ if (aInput == 0) {
+ return 0;
+ }
+ return T(-1) >> detail::CountLeadingZeroes(aInput);
+}
+
+template <typename T>
+class PowerOfTwoMask;
+
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask();
+
+template <typename T>
+class PowerOfTwo;
+
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo();
+
+// PowerOfTwoMask will always contain a mask for a power of 2, which is useful
+// for power-of-2 modulo operations (e.g., to keep an index inside a power-of-2
+// container).
+// Use this instead of PowerOfTwo if masking is the primary use of the value.
+//
+// Note that this class can store a "full" mask where all bits are set, so it
+// works for mask corresponding to the power of 2 that would overflow `T`
+// (e.g., 2^32 for uint32_t gives a mask of 2^32-1, which fits in a uint32_t).
+// For this reason there is no API that computes the power of 2 corresponding to
+// the mask; But this can be done explicitly with `MaskValue() + 1`, which may
+// be useful for computing things like distance-to-the-end by doing
+// `MaskValue() + 1 - offset`, which works fine with unsigned number types.
+template <typename T>
+class PowerOfTwoMask {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "PowerOfTwoMask must use an unsigned type");
+
+ public:
+ // Construct a power of 2 mask where the given value can fit.
+ // Cannot be constexpr because of `RoundUpPow2Mask()`.
+ explicit PowerOfTwoMask(T aInput) : mMask(RoundUpPow2Mask(aInput)) {}
+
+ // Compute the mask corresponding to a PowerOfTwo.
+ // This saves having to compute the nearest 2^N-1.
+ // Not a conversion constructor, as that could be ambiguous whether we'd want
+ // the mask corresponding to the power of 2 (2^N -> 2^N-1), or the mask that
+ // can *contain* the PowerOfTwo value (2^N -> 2^(N+1)-1).
+ // Note: Not offering reverse PowerOfTwoMark-to-PowerOfTwo conversion, because
+ // that could result in an unexpected 0 result for the largest possible mask.
+ template <typename U>
+ static constexpr PowerOfTwoMask<U> MaskForPowerOfTwo(
+ const PowerOfTwo<U>& aP2) {
+ return PowerOfTwoMask(aP2);
+ }
+
+ // Allow smaller unsigned types as input.
+ // Bigger or signed types must be explicitly converted by the caller.
+ template <typename U>
+ explicit constexpr PowerOfTwoMask(U aInput)
+ : mMask(RoundUpPow2Mask(static_cast<T>(aInput))) {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "PowerOfTwoMask does not accept signed types");
+ static_assert(sizeof(U) <= sizeof(T),
+ "PowerOfTwoMask does not accept bigger types");
+ }
+
+ constexpr T MaskValue() const { return mMask; }
+
+ // `x & aPowerOfTwoMask` just works.
+ template <typename U>
+ friend U operator&(U aNumber, PowerOfTwoMask aP2M) {
+ return static_cast<U>(aNumber & aP2M.MaskValue());
+ }
+
+ // `aPowerOfTwoMask & x` just works.
+ template <typename U>
+ friend constexpr U operator&(PowerOfTwoMask aP2M, U aNumber) {
+ return static_cast<U>(aP2M.MaskValue() & aNumber);
+ }
+
+ // `x % aPowerOfTwoMask(2^N-1)` is equivalent to `x % 2^N` but is more
+ // optimal by doing `x & (2^N-1)`.
+ // Useful for templated code doing modulo with a template argument type.
+ template <typename U>
+ friend constexpr U operator%(U aNumerator, PowerOfTwoMask aDenominator) {
+ return aNumerator & aDenominator.MaskValue();
+ }
+
+ constexpr bool operator==(const PowerOfTwoMask& aRhs) const {
+ return mMask == aRhs.mMask;
+ }
+ constexpr bool operator!=(const PowerOfTwoMask& aRhs) const {
+ return mMask != aRhs.mMask;
+ }
+
+ private:
+ // Trust `PowerOfTwo` to call the private Trusted constructor below.
+ friend class PowerOfTwo<T>;
+
+ // Trust `MakePowerOfTwoMask()` to call the private Trusted constructor below.
+ template <typename U, U Mask>
+ friend constexpr PowerOfTwoMask<U> MakePowerOfTwoMask();
+
+ struct Trusted {
+ T mMask;
+ };
+ // Construct the mask corresponding to a PowerOfTwo.
+ // This saves having to compute the nearest 2^N-1.
+ // Note: Not a public PowerOfTwo->PowerOfTwoMask conversion constructor, as
+ // that could be ambiguous whether we'd want the mask corresponding to the
+ // power of 2 (2^N -> 2^N-1), or the mask that can *contain* the PowerOfTwo
+ // value (2^N -> 2^(N+1)-1).
+ explicit constexpr PowerOfTwoMask(const Trusted& aP2) : mMask(aP2.mMask) {}
+
+ T mMask = 0;
+};
+
+// Make a PowerOfTwoMask constant, statically-checked.
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask() {
+ static_assert(Mask == T(-1) || IsPowerOfTwo(Mask + 1),
+ "MakePowerOfTwoMask<T, Mask>: Mask must be 2^N-1");
+ using Trusted = typename PowerOfTwoMask<T>::Trusted;
+ return PowerOfTwoMask<T>(Trusted{Mask});
+}
+
+// PowerOfTwo will always contain a power of 2.
+template <typename T>
+class PowerOfTwo {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "PowerOfTwo must use an unsigned type");
+
+ public:
+ // Construct a power of 2 that can fit the given value, or the highest power
+ // of 2 possible.
+ // Caller should explicitly check/assert `Value() <= aInput` if they want to.
+ // Cannot be constexpr because of `FriendlyRoundUpPow2()`.
+ explicit PowerOfTwo(T aInput) : mValue(FriendlyRoundUpPow2(aInput)) {}
+
+ // Allow smaller unsigned types as input.
+ // Bigger or signed types must be explicitly converted by the caller.
+ template <typename U>
+ explicit PowerOfTwo(U aInput)
+ : mValue(FriendlyRoundUpPow2(static_cast<T>(aInput))) {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "PowerOfTwo does not accept signed types");
+ static_assert(sizeof(U) <= sizeof(T),
+ "PowerOfTwo does not accept bigger types");
+ }
+
+ constexpr T Value() const { return mValue; }
+
+ // Binary mask corresponding to the power of 2, useful for modulo.
+ // E.g., `x & powerOfTwo(y).Mask()` == `x % powerOfTwo(y)`.
+ // Consider PowerOfTwoMask class instead of PowerOfTwo if masking is the
+ // primary use case.
+ constexpr T MaskValue() const { return mValue - 1; }
+
+ // PowerOfTwoMask corresponding to this power of 2, useful for modulo.
+ constexpr PowerOfTwoMask<T> Mask() const {
+ using Trusted = typename PowerOfTwoMask<T>::Trusted;
+ return PowerOfTwoMask<T>(Trusted{MaskValue()});
+ }
+
+ // `x % aPowerOfTwo` works optimally.
+ // Useful for templated code doing modulo with a template argument type.
+ // Use PowerOfTwoMask class instead if masking is the primary use case.
+ template <typename U>
+ friend constexpr U operator%(U aNumerator, PowerOfTwo aDenominator) {
+ return aNumerator & aDenominator.MaskValue();
+ }
+
+ constexpr bool operator==(const PowerOfTwo& aRhs) const {
+ return mValue == aRhs.mValue;
+ }
+ constexpr bool operator!=(const PowerOfTwo& aRhs) const {
+ return mValue != aRhs.mValue;
+ }
+ constexpr bool operator<(const PowerOfTwo& aRhs) const {
+ return mValue < aRhs.mValue;
+ }
+ constexpr bool operator<=(const PowerOfTwo& aRhs) const {
+ return mValue <= aRhs.mValue;
+ }
+ constexpr bool operator>(const PowerOfTwo& aRhs) const {
+ return mValue > aRhs.mValue;
+ }
+ constexpr bool operator>=(const PowerOfTwo& aRhs) const {
+ return mValue >= aRhs.mValue;
+ }
+
+ private:
+ // Trust `MakePowerOfTwo()` to call the private Trusted constructor below.
+ template <typename U, U Value>
+ friend constexpr PowerOfTwo<U> MakePowerOfTwo();
+
+ struct Trusted {
+ T mValue;
+ };
+ // Construct a PowerOfTwo with the given trusted value.
+ // This saves having to compute the nearest 2^N.
+ // Note: Not offering PowerOfTwoMark-to-PowerOfTwo conversion, because that
+ // could result in an unexpected 0 result for the largest possible mask.
+ explicit constexpr PowerOfTwo(const Trusted& aP2) : mValue(aP2.mValue) {}
+
+ // The smallest power of 2 is 2^0 == 1.
+ T mValue = 1;
+};
+
+// Make a PowerOfTwo constant, statically-checked.
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo() {
+ static_assert(IsPowerOfTwo(Value),
+ "MakePowerOfTwo<T, Value>: Value must be 2^N");
+ using Trusted = typename PowerOfTwo<T>::Trusted;
+ return PowerOfTwo<T>(Trusted{Value});
+}
+
+// Shortcuts for the most common types and functions.
+
+using PowerOfTwoMask32 = PowerOfTwoMask<uint32_t>;
+using PowerOfTwo32 = PowerOfTwo<uint32_t>;
+using PowerOfTwoMask64 = PowerOfTwoMask<uint64_t>;
+using PowerOfTwo64 = PowerOfTwo<uint64_t>;
+
+template <uint32_t Mask>
+constexpr PowerOfTwoMask32 MakePowerOfTwoMask32() {
+ return MakePowerOfTwoMask<uint32_t, Mask>();
+}
+
+template <uint32_t Value>
+constexpr PowerOfTwo32 MakePowerOfTwo32() {
+ return MakePowerOfTwo<uint32_t, Value>();
+}
+
+template <uint64_t Mask>
+constexpr PowerOfTwoMask64 MakePowerOfTwoMask64() {
+ return MakePowerOfTwoMask<uint64_t, Mask>();
+}
+
+template <uint64_t Value>
+constexpr PowerOfTwo64 MakePowerOfTwo64() {
+ return MakePowerOfTwo<uint64_t, Value>();
+}
+
+} // namespace mozilla
+
+#endif // PowerOfTwo_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunk.h b/mozglue/baseprofiler/public/ProfileBufferChunk.h
new file mode 100644
index 0000000000..9ba2483372
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunk.h
@@ -0,0 +1,547 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunk_h
+#define ProfileBufferChunk_h
+
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#if defined(MOZ_MEMORY)
+# include "mozmemory.h"
+#endif
+
+#include <algorithm>
+#include <limits>
+#include <type_traits>
+
+#ifdef DEBUG
+# include <cstdio>
+#endif
+
+namespace mozilla {
+
+// Represents a single chunk of memory, with a link to the next chunk (or null).
+//
+// A chunk is made of an internal header (which contains a public part) followed
+// by user-accessible bytes.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private | memory containing user blocks |
+// +---------------+---------+----------------------------------------------+
+// <---------------BufferBytes()------------------>
+// <------------------------------ChunkBytes()------------------------------>
+//
+// The chunk can reserve "blocks", but doesn't know the internal contents of
+// each block, it only knows where the first one starts, and where the last one
+// ends (which is where the next one will begin, if not already out of range).
+// It is up to the user to add structure to each block so that they can be
+// distinguished when later read.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private | [1st block]...[last full block] |
+// +---------------+---------+----------------------------------------------+
+// ChunkHeader().mOffsetFirstBlock ^ ^
+// ChunkHeader().mOffsetPastLastBlock --'
+//
+// It is possible to attempt to reserve more than the remaining space, in which
+// case only what is available is returned. The caller is responsible for using
+// another chunk, reserving a block "tail" in it, and using both parts to
+// constitute a full block. (This initial tail may be empty in some chunks.)
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private | tail][1st block]...[last full block][head... |
+// +---------------+---------+----------------------------------------------+
+// ChunkHeader().mOffsetFirstBlock ^ ^
+// ChunkHeader().mOffsetPastLastBlock --'
+//
+// Each Chunk has an internal state (checked in DEBUG builds) that directs how
+// to use it during creation, initialization, use, end of life, recycling, and
+// destruction. See `State` below for details.
+// In particular:
+// - `ReserveInitialBlockAsTail()` must be called before the first `Reserve()`
+// after construction or recycling, even with a size of 0 (no actual tail),
+// - `MarkDone()` and `MarkRecycled()` must be called as appropriate.
+class ProfileBufferChunk {
+ public:
+ using Byte = uint8_t;
+ using Length = uint32_t;
+
+ using SpanOfBytes = Span<Byte>;
+
+ // Hint about the size of the metadata (public and private headers).
+ // `Create()` below takes the minimum *buffer* size, so the minimum total
+ // Chunk size is at least `SizeofChunkMetadata() + aMinBufferBytes`.
+ [[nodiscard]] static constexpr Length SizeofChunkMetadata() {
+ return static_cast<Length>(sizeof(InternalHeader));
+ }
+
+ // Allocate space for a chunk with a given minimum size, and construct it.
+ // The actual size may be higher, to match the actual space taken in the
+ // memory pool.
+ [[nodiscard]] static UniquePtr<ProfileBufferChunk> Create(
+ Length aMinBufferBytes) {
+ // We need at least one byte, to cover the always-present `mBuffer` byte.
+ aMinBufferBytes = std::max(aMinBufferBytes, Length(1));
+ // Trivial struct with the same alignment as `ProfileBufferChunk`, and size
+ // equal to that alignment, because typically the sizeof of an object is
+ // a multiple of its alignment.
+ struct alignas(alignof(InternalHeader)) ChunkStruct {
+ Byte c[alignof(InternalHeader)];
+ };
+ static_assert(std::is_trivial_v<ChunkStruct>,
+ "ChunkStruct must be trivial to avoid any construction");
+ // Allocate an array of that struct, enough to contain the expected
+ // `ProfileBufferChunk` (with its header+buffer).
+ size_t count = (sizeof(InternalHeader) + aMinBufferBytes +
+ (alignof(InternalHeader) - 1)) /
+ alignof(InternalHeader);
+#if defined(MOZ_MEMORY)
+ // Potentially expand the array to use more of the effective allocation.
+ count = (malloc_good_size(count * sizeof(ChunkStruct)) +
+ (sizeof(ChunkStruct) - 1)) /
+ sizeof(ChunkStruct);
+#endif
+ auto chunkStorage = MakeUnique<ChunkStruct[]>(count);
+ MOZ_ASSERT(reinterpret_cast<uintptr_t>(chunkStorage.get()) %
+ alignof(InternalHeader) ==
+ 0);
+ // After the allocation, compute the actual chunk size (including header).
+ const size_t chunkBytes = count * sizeof(ChunkStruct);
+ MOZ_ASSERT(chunkBytes >= sizeof(ProfileBufferChunk),
+ "Not enough space to construct a ProfileBufferChunk");
+ MOZ_ASSERT(chunkBytes <=
+ static_cast<size_t>(std::numeric_limits<Length>::max()));
+ // Compute the size of the user-accessible buffer inside the chunk.
+ const Length bufferBytes =
+ static_cast<Length>(chunkBytes - sizeof(InternalHeader));
+ MOZ_ASSERT(bufferBytes >= aMinBufferBytes,
+ "Not enough space for minimum buffer size");
+ // Construct the header at the beginning of the allocated array, with the
+ // known buffer size.
+ new (chunkStorage.get()) ProfileBufferChunk(bufferBytes);
+ // We now have a proper `ProfileBufferChunk` object, create the appropriate
+ // UniquePtr for it.
+ UniquePtr<ProfileBufferChunk> chunk{
+ reinterpret_cast<ProfileBufferChunk*>(chunkStorage.release())};
+ MOZ_ASSERT(
+ size_t(reinterpret_cast<const char*>(
+ &chunk.get()->BufferSpan()[bufferBytes - 1]) -
+ reinterpret_cast<const char*>(chunk.get())) == chunkBytes - 1,
+ "Buffer span spills out of chunk allocation");
+ return chunk;
+ }
+
+#ifdef DEBUG
+ ~ProfileBufferChunk() {
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+ MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+ mInternalHeader.mState == InternalHeader::State::Done ||
+ mInternalHeader.mState == InternalHeader::State::Recycled);
+ }
+#endif
+
+ // Must be called with the first block tail (may be empty), which will be
+ // skipped if the reader starts with this ProfileBufferChunk.
+ [[nodiscard]] SpanOfBytes ReserveInitialBlockAsTail(Length aTailSize) {
+#ifdef DEBUG
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+ MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+ mInternalHeader.mState == InternalHeader::State::Recycled);
+ mInternalHeader.mState = InternalHeader::State::InUse;
+#endif
+ mInternalHeader.mHeader.mOffsetFirstBlock = aTailSize;
+ mInternalHeader.mHeader.mOffsetPastLastBlock = aTailSize;
+ mInternalHeader.mHeader.mStartTimeStamp = TimeStamp::Now();
+ return SpanOfBytes(&mBuffer, aTailSize);
+ }
+
+ struct ReserveReturn {
+ SpanOfBytes mSpan;
+ ProfileBufferBlockIndex mBlockRangeIndex;
+ };
+
+ // Reserve a block of up to `aBlockSize` bytes, and return a Span to it, and
+ // its starting index. The actual size may be smaller, if the block cannot fit
+ // in the remaining space.
+ [[nodiscard]] ReserveReturn ReserveBlock(Length aBlockSize) {
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+ MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse);
+ MOZ_ASSERT(RangeStart() != 0,
+ "Expected valid range start before first Reserve()");
+ const Length blockOffset = mInternalHeader.mHeader.mOffsetPastLastBlock;
+ Length reservedSize = aBlockSize;
+ if (MOZ_UNLIKELY(aBlockSize >= RemainingBytes())) {
+ reservedSize = RemainingBytes();
+#ifdef DEBUG
+ mInternalHeader.mState = InternalHeader::State::Full;
+#endif
+ }
+ mInternalHeader.mHeader.mOffsetPastLastBlock += reservedSize;
+ mInternalHeader.mHeader.mBlockCount += 1;
+ return {SpanOfBytes(&mBuffer + blockOffset, reservedSize),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mInternalHeader.mHeader.mRangeStart + blockOffset)};
+ }
+
+ // When a chunk will not be used to store more blocks (because it is full, or
+ // because the profiler will not add more data), it should be marked "done".
+ // Access to its content is still allowed.
+ void MarkDone() {
+#ifdef DEBUG
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+ MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse ||
+ mInternalHeader.mState == InternalHeader::State::Full);
+ mInternalHeader.mState = InternalHeader::State::Done;
+#endif
+ mInternalHeader.mHeader.mDoneTimeStamp = TimeStamp::Now();
+ }
+
+ // A "Done" chunk may be recycled, to avoid allocating a new one.
+ void MarkRecycled() {
+#ifdef DEBUG
+ // We also allow Created and already-Recycled chunks to be recycled, this
+ // way it's easier to recycle chunks when their state is not easily
+ // trackable.
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+ MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+ MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+ mInternalHeader.mState == InternalHeader::State::Done ||
+ mInternalHeader.mState == InternalHeader::State::Recycled);
+ mInternalHeader.mState = InternalHeader::State::Recycled;
+#endif
+ // Reset all header fields, in case this recycled chunk gets read.
+ mInternalHeader.mHeader.Reset();
+ }
+
+ // Public header, meant to uniquely identify a chunk, it may be shared with
+ // other processes to coordinate global memory handling.
+ struct Header {
+ explicit Header(Length aBufferBytes) : mBufferBytes(aBufferBytes) {}
+
+ // Reset all members to their as-new values (apart from the buffer size,
+ // which cannot change), ready for re-use.
+ void Reset() {
+ mOffsetFirstBlock = 0;
+ mOffsetPastLastBlock = 0;
+ mStartTimeStamp = TimeStamp{};
+ mDoneTimeStamp = TimeStamp{};
+ mBlockCount = 0;
+ mRangeStart = 0;
+ mProcessId = 0;
+ }
+
+ // Note: Part of the ordering of members below is to avoid unnecessary
+ // padding.
+
+ // Members managed by the ProfileBufferChunk.
+
+ // Offset of the first block (past the initial tail block, which may be 0).
+ Length mOffsetFirstBlock = 0;
+ // Offset past the last byte of the last reserved block
+ // It may be past mBufferBytes when last block continues in the next
+ // ProfileBufferChunk. It may be before mBufferBytes if ProfileBufferChunk
+ // is marked "Done" before the end is reached.
+ Length mOffsetPastLastBlock = 0;
+ // Timestamp when the buffer becomes in-use, ready to record data.
+ TimeStamp mStartTimeStamp;
+ // Timestamp when the buffer is "Done" (which happens when the last block is
+ // written). This will be used to find and discard the oldest
+ // ProfileBufferChunk.
+ TimeStamp mDoneTimeStamp;
+ // Number of bytes in the buffer, set once at construction time.
+ const Length mBufferBytes;
+ // Number of reserved blocks (including final one even if partial, but
+ // excluding initial tail).
+ Length mBlockCount = 0;
+
+ // Meta-data set by the user.
+
+ // Index of the first byte of this ProfileBufferChunk, relative to all
+ // Chunks for this process. Index 0 is reserved as nullptr-like index,
+ // mRangeStart should be set to a non-0 value before the first `Reserve()`.
+ ProfileBufferIndex mRangeStart = 0;
+ // Process writing to this ProfileBufferChunk.
+ int mProcessId = 0;
+
+ // A bit of spare space (necessary here because of the alignment due to
+ // other members), may be later repurposed for extra data.
+ const int mPADDING = 0;
+ };
+
+ [[nodiscard]] const Header& ChunkHeader() const {
+ return mInternalHeader.mHeader;
+ }
+
+ [[nodiscard]] Length BufferBytes() const {
+ return ChunkHeader().mBufferBytes;
+ }
+
+ // Total size of the chunk (buffer + header).
+ [[nodiscard]] Length ChunkBytes() const {
+ return static_cast<Length>(sizeof(InternalHeader)) + BufferBytes();
+ }
+
+ // Size of external resources, in this case all the following chunks.
+ [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+ const ProfileBufferChunk* const next = GetNext();
+ return next ? next->SizeOfIncludingThis(aMallocSizeOf) : 0;
+ }
+
+ // Size of this chunk and all following ones.
+ [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ // Just in case `aMallocSizeOf` falls back on just `sizeof`, make sure we
+ // account for at least the actual Chunk requested allocation size.
+ return std::max<size_t>(aMallocSizeOf(this), ChunkBytes()) +
+ SizeOfExcludingThis(aMallocSizeOf);
+ }
+
+ [[nodiscard]] Length RemainingBytes() const {
+ return BufferBytes() - OffsetPastLastBlock();
+ }
+
+ [[nodiscard]] Length OffsetFirstBlock() const {
+ return ChunkHeader().mOffsetFirstBlock;
+ }
+
+ [[nodiscard]] Length OffsetPastLastBlock() const {
+ return ChunkHeader().mOffsetPastLastBlock;
+ }
+
+ [[nodiscard]] Length BlockCount() const { return ChunkHeader().mBlockCount; }
+
+ [[nodiscard]] int ProcessId() const { return ChunkHeader().mProcessId; }
+
+ void SetProcessId(int aProcessId) {
+ mInternalHeader.mHeader.mProcessId = aProcessId;
+ }
+
+ // Global range index at the start of this Chunk.
+ [[nodiscard]] ProfileBufferIndex RangeStart() const {
+ return ChunkHeader().mRangeStart;
+ }
+
+ void SetRangeStart(ProfileBufferIndex aRangeStart) {
+ mInternalHeader.mHeader.mRangeStart = aRangeStart;
+ }
+
+ // Get a read-only Span to the buffer. It is up to the caller to decypher the
+ // contents, based on known offsets and the internal block structure.
+ [[nodiscard]] Span<const Byte> BufferSpan() const {
+ return Span<const Byte>(&mBuffer, BufferBytes());
+ }
+
+ [[nodiscard]] Byte ByteAt(Length aOffset) const {
+ MOZ_ASSERT(aOffset < OffsetPastLastBlock());
+ return *(&mBuffer + aOffset);
+ }
+
+ [[nodiscard]] ProfileBufferChunk* GetNext() {
+ return mInternalHeader.mNext.get();
+ }
+ [[nodiscard]] const ProfileBufferChunk* GetNext() const {
+ return mInternalHeader.mNext.get();
+ }
+
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> ReleaseNext() {
+ return std::move(mInternalHeader.mNext);
+ }
+
+ void InsertNext(UniquePtr<ProfileBufferChunk>&& aChunk) {
+ if (!aChunk) {
+ return;
+ }
+ aChunk->SetLast(ReleaseNext());
+ mInternalHeader.mNext = std::move(aChunk);
+ }
+
+ // Find the last chunk in this chain (it may be `this`).
+ [[nodiscard]] ProfileBufferChunk* Last() {
+ ProfileBufferChunk* chunk = this;
+ for (;;) {
+ ProfileBufferChunk* next = chunk->GetNext();
+ if (!next) {
+ return chunk;
+ }
+ chunk = next;
+ }
+ }
+ [[nodiscard]] const ProfileBufferChunk* Last() const {
+ const ProfileBufferChunk* chunk = this;
+ for (;;) {
+ const ProfileBufferChunk* next = chunk->GetNext();
+ if (!next) {
+ return chunk;
+ }
+ chunk = next;
+ }
+ }
+
+ void SetLast(UniquePtr<ProfileBufferChunk>&& aChunk) {
+ if (!aChunk) {
+ return;
+ }
+ Last()->mInternalHeader.mNext = std::move(aChunk);
+ }
+
+ // Join two possibly-null chunk lists.
+ [[nodiscard]] static UniquePtr<ProfileBufferChunk> Join(
+ UniquePtr<ProfileBufferChunk>&& aFirst,
+ UniquePtr<ProfileBufferChunk>&& aLast) {
+ if (aFirst) {
+ aFirst->SetLast(std::move(aLast));
+ return std::move(aFirst);
+ }
+ return std::move(aLast);
+ }
+
+#ifdef DEBUG
+ void Dump(std::FILE* aFile = stdout) const {
+ fprintf(aFile,
+ "Chunk[%p] chunkSize=%u bufferSize=%u state=%s rangeStart=%u "
+ "firstBlockOffset=%u offsetPastLastBlock=%u blockCount=%u",
+ this, unsigned(ChunkBytes()), unsigned(BufferBytes()),
+ mInternalHeader.StateString(), unsigned(RangeStart()),
+ unsigned(OffsetFirstBlock()), unsigned(OffsetPastLastBlock()),
+ unsigned(BlockCount()));
+ const auto len = OffsetPastLastBlock();
+ constexpr unsigned columns = 16;
+ unsigned char ascii[columns + 1];
+ ascii[columns] = '\0';
+ for (Length i = 0; i < len; ++i) {
+ if (i % columns == 0) {
+ fprintf(aFile, "\n %4u=0x%03x:", unsigned(i), unsigned(i));
+ for (unsigned a = 0; a < columns; ++a) {
+ ascii[a] = ' ';
+ }
+ }
+ unsigned char sep = ' ';
+ if (i == OffsetFirstBlock()) {
+ if (i == OffsetPastLastBlock()) {
+ sep = '#';
+ } else {
+ sep = '[';
+ }
+ } else if (i == OffsetPastLastBlock()) {
+ sep = ']';
+ }
+ unsigned char c = *(&mBuffer + i);
+ fprintf(aFile, "%c%02x", sep, c);
+
+ if (i == len - 1) {
+ if (i + 1 == OffsetPastLastBlock()) {
+ // Special case when last block ends right at the end.
+ fprintf(aFile, "]");
+ } else {
+ fprintf(aFile, " ");
+ }
+ } else if (i % columns == columns - 1) {
+ fprintf(aFile, " ");
+ }
+
+ ascii[i % columns] = (c >= ' ' && c <= '~') ? c : '.';
+
+ if (i % columns == columns - 1) {
+ fprintf(aFile, " %s", ascii);
+ }
+ }
+
+ if (len % columns < columns - 1) {
+ for (Length i = len % columns; i < columns; ++i) {
+ fprintf(aFile, " ");
+ }
+ fprintf(aFile, " %s", ascii);
+ }
+
+ fprintf(aFile, "\n");
+ }
+#endif // DEBUG
+
+ private:
+ // ProfileBufferChunk constructor. Use static `Create()` to allocate and
+ // construct a ProfileBufferChunk.
+ explicit ProfileBufferChunk(Length aBufferBytes)
+ : mInternalHeader(aBufferBytes) {}
+
+ // This internal header starts with the public `Header`, and adds some data
+ // only necessary for local handling.
+ // This encapsulation is also necessary to perform placement-new in
+ // `Create()`.
+ struct InternalHeader {
+ explicit InternalHeader(Length aBufferBytes) : mHeader(aBufferBytes) {}
+
+ Header mHeader;
+ UniquePtr<ProfileBufferChunk> mNext;
+
+#ifdef DEBUG
+ enum class State {
+ Created, // Self-set. Just constructed, waiting for initial block tail.
+ InUse, // Ready to accept blocks.
+ Full, // Self-set. Blocks reach the end (or further).
+ Done, // Blocks won't be added anymore.
+ Recycled // Still full of data, but expecting an initial block tail.
+ };
+
+ State mState = State::Created;
+ // Transition table: (X=unexpected)
+ // Method \ State Created InUse Full Done Recycled
+ // ReserveInitialBlockAsTail InUse X X X InUse
+ // Reserve X InUse/Full X X X
+ // MarkDone X Done Done X X
+ // MarkRecycled X X X Recycled X
+ // destructor ok X X ok ok
+
+ const char* StateString() const {
+ switch (mState) {
+ case State::Created:
+ return "Created";
+ case State::InUse:
+ return "InUse";
+ case State::Full:
+ return "Full";
+ case State::Done:
+ return "Done";
+ case State::Recycled:
+ return "Recycled";
+ default:
+ return "?";
+ }
+ }
+#else // DEBUG
+ const char* StateString() const { return "(non-DEBUG)"; }
+#endif
+ };
+
+ InternalHeader mInternalHeader;
+
+ // KEEP THIS LAST!
+ // First byte of the buffer. Note that ProfileBufferChunk::Create allocates a
+ // bigger block, such that `mBuffer` is the first of `mBufferBytes` available
+ // bytes.
+ // The initialization is not strictly needed, because bytes should only be
+ // read after they have been written and `mOffsetPastLastBlock` has been
+ // updated. However:
+ // - Reviewbot complains that it's not initialized.
+ // - It's cheap to initialize one byte.
+ // - In the worst case (reading does happen), zero is not a valid entry size
+ // and should get caught in entry readers.
+ Byte mBuffer = '\0';
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferChunk_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
new file mode 100644
index 0000000000..e7f12bf21f
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManager_h
+#define ProfileBufferChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+#include "mozilla/ScopeExit.h"
+
+#include <functional>
+
+namespace mozilla {
+
+// Manages the ProfileBufferChunks for this process.
+// The main user of this class is the buffer that needs chunks to store its
+// data.
+// The main ProfileBufferChunks responsibilities are:
+// - It can create new chunks, they are called "unreleased".
+// - Later these chunks are returned here, and become "released".
+// - The manager is free to destroy or recycle the oldest released chunks
+// (usually to reclaim memory), and will inform the user through a provided
+// callback.
+// - The user may access still-alive released chunks.
+class ProfileBufferChunkManager {
+ public:
+ virtual ~ProfileBufferChunkManager()
+#ifdef DEBUG
+ {
+ MOZ_ASSERT(!mUser, "Still registered when being destroyed");
+ }
+#else
+ = default;
+#endif
+
+ // Expected maximum size needed to store one stack sample.
+ // Most ChunkManager sub-classes will require chunk sizes, this can serve as
+ // a minimum recommendation to hold most backtraces.
+ constexpr static ProfileBufferChunk::Length scExpectedMaximumStackSize =
+ 128 * 1024;
+
+ // Estimated maximum buffer size.
+ [[nodiscard]] virtual size_t MaxTotalSize() const = 0;
+
+ // Create or recycle a chunk right now. May return null in case of allocation
+ // failure.
+ // Note that the chunk-destroyed callback may be invoked during this call;
+ // user should be careful with reentrancy issues.
+ [[nodiscard]] virtual UniquePtr<ProfileBufferChunk> GetChunk() = 0;
+
+ // `aChunkReceiver` may be called with a new or recycled chunk, or nullptr.
+ // (See `FulfillChunkRequests()` regarding when the callback may happen.)
+ virtual void RequestChunk(
+ std::function<void(UniquePtr<ProfileBufferChunk>)>&& aChunkReceiver) = 0;
+
+ // This method may be invoked at any time on any thread (and not necessarily
+ // by the main user of this class), to do the work necessary to respond to a
+ // previous `RequestChunk()`.
+ // It is optional: If it is never called, or called too late, the user is
+ // responsible for directly calling `GetChunk()` when a chunk is really
+ // needed (or it should at least fail gracefully).
+ // The idea is to fulfill chunk request on a separate thread, and most
+ // importantly outside of profiler calls, to avoid doing expensive memory
+ // allocations during these calls.
+ virtual void FulfillChunkRequests() = 0;
+
+ // One chunk is released by the user, the ProfileBufferChunkManager should
+ // keep it as long as possible (depending on local or global memory/time
+ // limits). Note that the chunk-destroyed callback may be invoked during this
+ // call; user should be careful with reentrancy issues.
+ virtual void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) = 0;
+
+ // `aChunkDestroyedCallback` will be called whenever the contents of a
+ // previously-released chunk is about to be destroyed or recycled.
+ // Note that it may be called during other functions above, or at other times
+ // from the same or other threads; user should be careful with reentrancy
+ // issues.
+ virtual void SetChunkDestroyedCallback(
+ std::function<void(const ProfileBufferChunk&)>&&
+ aChunkDestroyedCallback) = 0;
+
+ // Give away all released chunks that have not yet been destroyed.
+ [[nodiscard]] virtual UniquePtr<ProfileBufferChunk>
+ GetExtantReleasedChunks() = 0;
+
+ // Let a callback see all released chunks that have not yet been destroyed, if
+ // any. Return whatever the callback returns.
+ template <typename Callback>
+ [[nodiscard]] auto PeekExtantReleasedChunks(Callback&& aCallback) {
+ const ProfileBufferChunk* chunks = PeekExtantReleasedChunksAndLock();
+ auto unlock =
+ MakeScopeExit([&]() { UnlockAfterPeekExtantReleasedChunks(); });
+ return std::forward<Callback>(aCallback)(chunks);
+ }
+
+ // Chunks that were still unreleased will never be released.
+ virtual void ForgetUnreleasedChunks() = 0;
+
+ [[nodiscard]] virtual size_t SizeOfExcludingThis(
+ MallocSizeOf aMallocSizeOf) const = 0;
+ [[nodiscard]] virtual size_t SizeOfIncludingThis(
+ MallocSizeOf aMallocSizeOf) const = 0;
+
+ protected:
+ // Derived classes to implement `PeekExtantReleasedChunks` through these:
+ virtual const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() = 0;
+ virtual void UnlockAfterPeekExtantReleasedChunks() = 0;
+
+#ifdef DEBUG
+ public:
+ // DEBUG checks ensuring that this manager and its users avoid UAFs.
+ // Derived classes should assert that mUser is not null in their functions.
+
+ void RegisteredWith(const void* aUser) {
+ MOZ_ASSERT(!mUser);
+ MOZ_ASSERT(aUser);
+ mUser = aUser;
+ }
+
+ void DeregisteredFrom(const void* aUser) {
+ MOZ_ASSERT(mUser == aUser);
+ mUser = nullptr;
+ }
+
+ protected:
+ const void* mUser = nullptr;
+#endif // DEBUG
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
new file mode 100644
index 0000000000..c91b38cbdb
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
@@ -0,0 +1,172 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerSingle_h
+#define ProfileBufferChunkManagerSingle_h
+
+#include "mozilla/ProfileBufferChunkManager.h"
+
+#ifdef DEBUG
+# include "mozilla/Atomics.h"
+#endif // DEBUG
+
+namespace mozilla {
+
+// Manages only one Chunk.
+// The first call to `Get`/`RequestChunk()` will retrieve the one chunk, and all
+// subsequent calls will return nullptr. That chunk may still be released, but
+// it will never be destroyed or recycled.
+// Unlike others, this manager may be `Reset()`, to allow another round of
+// small-data gathering.
+// The main use is with short-lived ProfileChunkedBuffers that collect little
+// data that can fit in one chunk, e.g., capturing one stack.
+// It is not thread-safe.
+class ProfileBufferChunkManagerSingle final : public ProfileBufferChunkManager {
+ public:
+ using Length = ProfileBufferChunk::Length;
+
+ // Use a preallocated chunk. (Accepting null to gracefully handle OOM.)
+ explicit ProfileBufferChunkManagerSingle(UniquePtr<ProfileBufferChunk> aChunk)
+ : mInitialChunk(std::move(aChunk)),
+ mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {
+ MOZ_ASSERT(!mInitialChunk || !mInitialChunk->GetNext(),
+ "Expected at most one chunk");
+ }
+
+ // ChunkMinBufferBytes: Minimum number of user-available bytes in the Chunk.
+ // Note that Chunks use a bit more memory for their header.
+ explicit ProfileBufferChunkManagerSingle(Length aChunkMinBufferBytes)
+ : mInitialChunk(ProfileBufferChunk::Create(aChunkMinBufferBytes)),
+ mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {}
+
+#ifdef DEBUG
+ ~ProfileBufferChunkManagerSingle() { MOZ_ASSERT(mVirtuallyLocked == false); }
+#endif // DEBUG
+
+ // Reset this manager, using the provided chunk (probably coming from the
+ // ProfileChunkedBuffer that just used it); if null, fallback on current or
+ // released chunk.
+ void Reset(UniquePtr<ProfileBufferChunk> aPossibleChunk) {
+ if (aPossibleChunk) {
+ mInitialChunk = std::move(aPossibleChunk);
+ mReleasedChunk = nullptr;
+ } else if (!mInitialChunk) {
+ MOZ_ASSERT(!!mReleasedChunk, "Can't reset properly!");
+ mInitialChunk = std::move(mReleasedChunk);
+ }
+
+ if (mInitialChunk) {
+ mInitialChunk->MarkRecycled();
+ mBufferBytes = mInitialChunk->BufferBytes();
+ } else {
+ mBufferBytes = 0;
+ }
+ }
+
+ [[nodiscard]] size_t MaxTotalSize() const final { return mBufferBytes; }
+
+ // One of `GetChunk` and `RequestChunk` will only work the very first time (if
+ // there's even a chunk).
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return std::move(mInitialChunk);
+ }
+
+ void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+ aChunkReceiver) final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ // Simple retrieval.
+ std::move(aChunkReceiver)(GetChunk());
+ }
+
+ void FulfillChunkRequests() final {
+ // Nothing to do here.
+ }
+
+ void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ if (!aChunk) {
+ return;
+ }
+ MOZ_ASSERT(!mReleasedChunk, "Unexpected 2nd released chunk");
+ MOZ_ASSERT(!aChunk->GetNext(), "Only expected one released chunk");
+ mReleasedChunk = std::move(aChunk);
+ }
+
+ void SetChunkDestroyedCallback(
+ std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+ final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ // The chunk-destroyed callback will never actually be called, but we keep
+ // the callback here in case the caller expects it to live as long as this
+ // manager.
+ mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+ }
+
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return std::move(mReleasedChunk);
+ }
+
+ void ForgetUnreleasedChunks() final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ }
+
+ [[nodiscard]] size_t SizeOfExcludingThis(
+ MallocSizeOf aMallocSizeOf) const final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ size_t size = 0;
+ if (mInitialChunk) {
+ size += mInitialChunk->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ if (mReleasedChunk) {
+ size += mReleasedChunk->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ // Note: Missing size of std::function external resources (if any).
+ return size;
+ }
+
+ [[nodiscard]] size_t SizeOfIncludingThis(
+ MallocSizeOf aMallocSizeOf) const final {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+ }
+
+ protected:
+ // This manager is not thread-safe, so there's not actual locking needed.
+ const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final {
+ MOZ_ASSERT(mVirtuallyLocked.compareExchange(false, true));
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return mReleasedChunk.get();
+ }
+ void UnlockAfterPeekExtantReleasedChunks() final {
+ MOZ_ASSERT(mVirtuallyLocked.compareExchange(true, false));
+ }
+
+ private:
+ // Initial chunk created with this manager, given away at first Get/Request.
+ UniquePtr<ProfileBufferChunk> mInitialChunk;
+
+ // Storage for the released chunk (which should probably not happen, as it
+ // means the chunk is full).
+ UniquePtr<ProfileBufferChunk> mReleasedChunk;
+
+ // Size of the one chunk we're managing. Stored here, because the chunk may
+ // be moved out and inaccessible from here.
+ Length mBufferBytes;
+
+ // The chunk-destroyed callback will never actually be called, but we keep it
+ // here in case the caller expects it to live as long as this manager.
+ std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+#ifdef DEBUG
+ mutable Atomic<bool> mVirtuallyLocked{false};
+#endif // DEBUG
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferChunkManagerSingle_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
new file mode 100644
index 0000000000..034279809d
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
@@ -0,0 +1,444 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerWithLocalLimit_h
+#define ProfileBufferChunkManagerWithLocalLimit_h
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferControlledChunkManager.h"
+#include "mozilla/mozalloc.h"
+
+#include <utility>
+
+namespace mozilla {
+
+// Manages the Chunks for this process in a thread-safe manner, with a maximum
+// size per process.
+//
+// "Unreleased" chunks are not owned here, only "released" chunks can be
+// destroyed or recycled when reaching the memory limit, so it is theoretically
+// possible to break that limit, if:
+// - The user of this class doesn't release their chunks, AND/OR
+// - The limit is too small (e.g., smaller than 2 or 3 chunks, which should be
+// the usual number of unreleased chunks in flight).
+// In this case, it just means that we will use more memory than allowed,
+// potentially risking OOMs. Hopefully this shouldn't happen in real code,
+// assuming that the user is doing the right thing and releasing chunks ASAP,
+// and that the memory limit is reasonably large.
+class ProfileBufferChunkManagerWithLocalLimit final
+ : public ProfileBufferChunkManager,
+ public ProfileBufferControlledChunkManager {
+ public:
+ using Length = ProfileBufferChunk::Length;
+
+ // MaxTotalBytes: Maximum number of bytes allocated in all local Chunks.
+ // ChunkMinBufferBytes: Minimum number of user-available bytes in each Chunk.
+ // Note that Chunks use a bit more memory for their header.
+ explicit ProfileBufferChunkManagerWithLocalLimit(size_t aMaxTotalBytes,
+ Length aChunkMinBufferBytes)
+ : mMaxTotalBytes(aMaxTotalBytes),
+ mChunkMinBufferBytes(aChunkMinBufferBytes) {}
+
+ ~ProfileBufferChunkManagerWithLocalLimit() {
+ if (mUpdateCallback) {
+ // Signal the end of this callback.
+ std::move(mUpdateCallback)(Update(nullptr));
+ }
+ }
+
+ [[nodiscard]] size_t MaxTotalSize() const final {
+ // `mMaxTotalBytes` is `const` so there is no need to lock the mutex.
+ return mMaxTotalBytes;
+ }
+
+ [[nodiscard]] size_t TotalSize() const { return mTotalBytes; }
+
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+ AUTO_PROFILER_STATS(Local_GetChunk);
+
+ ChunkAndUpdate chunkAndUpdate = [&]() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ return GetChunk(lock);
+ }();
+
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+ mUpdateCallback(std::move(chunkAndUpdate.second));
+ }
+
+ return std::move(chunkAndUpdate.first);
+ }
+
+ void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+ aChunkReceiver) final {
+ AUTO_PROFILER_STATS(Local_RequestChunk);
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ if (mChunkReceiver) {
+ // We already have a chunk receiver, meaning a request is pending.
+ return;
+ }
+ // Store the chunk receiver. This indicates that a request is pending, and
+ // it will be handled in the next `FulfillChunkRequests()` call.
+ mChunkReceiver = std::move(aChunkReceiver);
+ }
+
+ void FulfillChunkRequests() final {
+ AUTO_PROFILER_STATS(Local_FulfillChunkRequests);
+ std::function<void(UniquePtr<ProfileBufferChunk>)> chunkReceiver;
+ ChunkAndUpdate chunkAndUpdate = [&]() -> ChunkAndUpdate {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ if (!mChunkReceiver) {
+ // No receiver means no pending request, we're done.
+ return {};
+ }
+ // Otherwise there is a request, extract the receiver to call below.
+ std::swap(chunkReceiver, mChunkReceiver);
+ MOZ_ASSERT(!mChunkReceiver, "mChunkReceiver should have been emptied");
+ // And allocate the requested chunk. This may fail, it's fine, we're
+ // letting the receiver know about it.
+ AUTO_PROFILER_STATS(Local_FulfillChunkRequests_GetChunk);
+ return GetChunk(lock);
+ }();
+
+ if (chunkReceiver) {
+ {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+ mUpdateCallback(std::move(chunkAndUpdate.second));
+ }
+ }
+
+ // Invoke callback outside of lock, so that it can use other chunk manager
+ // functions if needed.
+ // Note that this means there could be a race, where another request
+ // happens now and even gets fulfilled before this one is! It should be
+ // rare, and shouldn't be a problem anyway, the user will still get their
+ // requested chunks, new/recycled chunks look the same so their order
+ // doesn't matter.
+ std::move(chunkReceiver)(std::move(chunkAndUpdate.first));
+ }
+ }
+
+ void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+ if (!aChunk) {
+ return;
+ }
+
+ MOZ_RELEASE_ASSERT(!aChunk->GetNext(), "ReleaseChunk only accepts 1 chunk");
+ MOZ_RELEASE_ASSERT(!aChunk->ChunkHeader().mDoneTimeStamp.IsNull(),
+ "Released chunk should have a 'Done' timestamp");
+
+ Update update = [&]() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ MOZ_ASSERT(mUser, "Not registered yet");
+ // Keep a pointer to the first newly-released chunk, so we can use it to
+ // prepare an update (after `aChunk` is moved-from).
+ const ProfileBufferChunk* const newlyReleasedChunk = aChunk.get();
+ // Transfer the chunk size from the unreleased bucket to the released one.
+ mUnreleasedBufferBytes -= aChunk->BufferBytes();
+ mReleasedBufferBytes += aChunk->BufferBytes();
+ if (!mReleasedChunks) {
+ // No other released chunks at the moment, we're starting the list.
+ MOZ_ASSERT(mReleasedBufferBytes == aChunk->BufferBytes());
+ mReleasedChunks = std::move(aChunk);
+ } else {
+ // Insert aChunk in mReleasedChunks to keep done-timestamp order.
+ const TimeStamp& releasedChunkDoneTimeStamp =
+ aChunk->ChunkHeader().mDoneTimeStamp;
+ if (releasedChunkDoneTimeStamp <
+ mReleasedChunks->ChunkHeader().mDoneTimeStamp) {
+ // aChunk is the oldest -> Insert at the beginning.
+ aChunk->SetLast(std::move(mReleasedChunks));
+ mReleasedChunks = std::move(aChunk);
+ } else {
+ // Go through the already-released chunk list, and insert aChunk
+ // before the first younger released chunk, or at the end.
+ ProfileBufferChunk* chunk = mReleasedChunks.get();
+ for (;;) {
+ ProfileBufferChunk* const nextChunk = chunk->GetNext();
+ if (!nextChunk || releasedChunkDoneTimeStamp <
+ nextChunk->ChunkHeader().mDoneTimeStamp) {
+ // Either we're at the last released chunk, or the next released
+ // chunk is younger -> Insert right after this released chunk.
+ chunk->InsertNext(std::move(aChunk));
+ break;
+ }
+ chunk = nextChunk;
+ }
+ }
+ }
+
+ return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+ mReleasedChunks.get(), newlyReleasedChunk);
+ }();
+
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback && !update.IsNotUpdate()) {
+ mUpdateCallback(std::move(update));
+ }
+ }
+
+ void SetChunkDestroyedCallback(
+ std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+ final {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ MOZ_ASSERT(mUser, "Not registered yet");
+ mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+ }
+
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+ UniquePtr<ProfileBufferChunk> chunks;
+ size_t unreleasedBufferBytes = [&]() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ MOZ_ASSERT(mUser, "Not registered yet");
+ mReleasedBufferBytes = 0;
+ chunks = std::move(mReleasedChunks);
+ return mUnreleasedBufferBytes;
+ }();
+
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback) {
+ mUpdateCallback(Update(unreleasedBufferBytes, 0, nullptr, nullptr));
+ }
+
+ return chunks;
+ }
+
+ void ForgetUnreleasedChunks() final {
+ Update update = [&]() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ MOZ_ASSERT(mUser, "Not registered yet");
+ mUnreleasedBufferBytes = 0;
+ return Update(0, mReleasedBufferBytes, mReleasedChunks.get(), nullptr);
+ }();
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback) {
+ mUpdateCallback(std::move(update));
+ }
+ }
+
+ [[nodiscard]] size_t SizeOfExcludingThis(
+ MallocSizeOf aMallocSizeOf) const final {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ return SizeOfExcludingThis(aMallocSizeOf, lock);
+ }
+
+ [[nodiscard]] size_t SizeOfIncludingThis(
+ MallocSizeOf aMallocSizeOf) const final {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+ }
+
+ void SetUpdateCallback(UpdateCallback&& aUpdateCallback) final {
+ {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ if (mUpdateCallback) {
+ // Signal the end of the previous callback.
+ std::move(mUpdateCallback)(Update(nullptr));
+ mUpdateCallback = nullptr;
+ }
+ }
+
+ if (aUpdateCallback) {
+ Update initialUpdate = [&]() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+ mReleasedChunks.get(), nullptr);
+ }();
+
+ baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+ MOZ_ASSERT(!mUpdateCallback, "Only one update callback allowed");
+ mUpdateCallback = std::move(aUpdateCallback);
+ mUpdateCallback(std::move(initialUpdate));
+ }
+ }
+
+ void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) final {
+ MOZ_ASSERT(!aDoneTimeStamp.IsNull());
+ baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+ for (;;) {
+ if (!mReleasedChunks) {
+ // We don't own any released chunks (anymore), we're done.
+ break;
+ }
+ if (mReleasedChunks->ChunkHeader().mDoneTimeStamp > aDoneTimeStamp) {
+ // The current chunk is strictly after the given timestamp, we're done.
+ break;
+ }
+ // We've found a chunk at or before the timestamp, discard it.
+ DiscardOldestReleasedChunk(lock);
+ }
+ }
+
+ protected:
+ const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final
+ MOZ_CAPABILITY_ACQUIRE(mMutex) {
+ mMutex.Lock();
+ MOZ_ASSERT(mUser, "Not registered yet");
+ return mReleasedChunks.get();
+ }
+ void UnlockAfterPeekExtantReleasedChunks() final
+ MOZ_CAPABILITY_RELEASE(mMutex) {
+ mMutex.Unlock();
+ }
+
+ private:
+ size_t MaybeRecycleChunkAndGetDeallocatedSize(
+ UniquePtr<ProfileBufferChunk>&& chunk,
+ const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+ // Try to recycle big-enough chunks. (All chunks should have the same size,
+ // but it's a cheap test and may allow future adjustments based on actual
+ // data rate.)
+ if (chunk->BufferBytes() >= mChunkMinBufferBytes) {
+ // We keep up to two recycled chunks at any time.
+ if (!mRecycledChunks) {
+ mRecycledChunks = std::move(chunk);
+ return 0;
+ } else if (!mRecycledChunks->GetNext()) {
+ mRecycledChunks->InsertNext(std::move(chunk));
+ return 0;
+ }
+ }
+ return moz_malloc_usable_size(chunk.get());
+ }
+
+ UniquePtr<ProfileBufferChunk> TakeRecycledChunk(
+ const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+ UniquePtr<ProfileBufferChunk> recycled;
+ if (mRecycledChunks) {
+ recycled = std::exchange(mRecycledChunks, mRecycledChunks->ReleaseNext());
+ recycled->MarkRecycled();
+ }
+ return recycled;
+ }
+
+ void DiscardOldestReleasedChunk(
+ const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+ MOZ_ASSERT(!!mReleasedChunks);
+ UniquePtr<ProfileBufferChunk> oldest =
+ std::exchange(mReleasedChunks, mReleasedChunks->ReleaseNext());
+ mReleasedBufferBytes -= oldest->BufferBytes();
+ if (mChunkDestroyedCallback) {
+ // Inform the user that we're going to destroy this chunk.
+ mChunkDestroyedCallback(*oldest);
+ }
+
+ mTotalBytes -=
+ MaybeRecycleChunkAndGetDeallocatedSize(std::move(oldest), aLock);
+ }
+
+ using ChunkAndUpdate = std::pair<UniquePtr<ProfileBufferChunk>, Update>;
+ [[nodiscard]] ChunkAndUpdate GetChunk(
+ const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ // After this function, the total memory consumption will be the sum of:
+ // - Bytes from released (i.e., full) chunks,
+ // - Bytes from unreleased (still in use) chunks,
+ // - Bytes from the chunk we want to create/recycle. (Note that we don't
+ // count the extra bytes of chunk header, and of extra allocation ability,
+ // for the new chunk, as it's assumed to be negligible compared to the
+ // total memory limit.)
+ // If this total is higher than the local limit, we'll want to destroy
+ // the oldest released chunks until we're under the limit; if any, we may
+ // recycle one of them to avoid a deallocation followed by an allocation.
+ while (mReleasedBufferBytes + mUnreleasedBufferBytes +
+ mChunkMinBufferBytes >=
+ mMaxTotalBytes &&
+ !!mReleasedChunks) {
+ // We have reached the local limit, discard the oldest released chunk.
+ DiscardOldestReleasedChunk(aLock);
+ }
+
+ // Extract the recycled chunk, if any.
+ ChunkAndUpdate chunkAndUpdate{TakeRecycledChunk(aLock), Update()};
+ UniquePtr<ProfileBufferChunk>& chunk = chunkAndUpdate.first;
+
+ if (!chunk) {
+ // No recycled chunk -> Create a chunk now. (This could still fail.)
+ chunk = ProfileBufferChunk::Create(mChunkMinBufferBytes);
+ mTotalBytes += moz_malloc_usable_size(chunk.get());
+ }
+
+ if (chunk) {
+ // We do have a chunk (recycled or new), record its size as "unreleased".
+ mUnreleasedBufferBytes += chunk->BufferBytes();
+
+ chunkAndUpdate.second =
+ Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+ mReleasedChunks.get(), nullptr);
+ }
+
+ return chunkAndUpdate;
+ }
+
+ [[nodiscard]] size_t SizeOfExcludingThis(
+ MallocSizeOf aMallocSizeOf,
+ const baseprofiler::detail::BaseProfilerAutoLock&) const {
+ MOZ_ASSERT(mUser, "Not registered yet");
+ size_t size = 0;
+ if (mReleasedChunks) {
+ size += mReleasedChunks->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ if (mRecycledChunks) {
+ size += mRecycledChunks->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ // Note: Missing size of std::function external resources (if any).
+ return size;
+ }
+
+ // Maxumum number of bytes that should be used by all unreleased and released
+ // chunks. Note that only released chunks can be destroyed here, so it is the
+ // responsibility of the user to properly release their chunks when possible.
+ const size_t mMaxTotalBytes;
+
+ // Minimum number of bytes that new chunks should be able to store.
+ // Used when calling `ProfileBufferChunk::Create()`.
+ const Length mChunkMinBufferBytes;
+
+ // Mutex guarding the following members.
+ mutable baseprofiler::detail::BaseProfilerMutex mMutex;
+
+ // Number of bytes currently held in chunks that have been given away (through
+ // `GetChunk` or `RequestChunk`) and not released yet.
+ size_t mUnreleasedBufferBytes = 0;
+
+ // Number of bytes currently held in chunks that have been released and stored
+ // in `mReleasedChunks` below.
+ size_t mReleasedBufferBytes = 0;
+
+ // Total allocated size (used to substract it from memory counters).
+ size_t mTotalBytes = 0;
+
+ // List of all released chunks. The oldest one should be at the start of the
+ // list, and may be destroyed or recycled when the memory limit is reached.
+ UniquePtr<ProfileBufferChunk> mReleasedChunks;
+
+ // This may hold chunks that were released then slated for destruction, they
+ // will be reused next time an allocation would have been needed.
+ UniquePtr<ProfileBufferChunk> mRecycledChunks;
+
+ // Optional callback used to notify the user when a chunk is about to be
+ // destroyed or recycled. (The data content is always destroyed, but the chunk
+ // container may be reused.)
+ std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+ // Callback set from `RequestChunk()`, until it is serviced in
+ // `FulfillChunkRequests()`. There can only be one request in flight.
+ std::function<void(UniquePtr<ProfileBufferChunk>)> mChunkReceiver;
+
+ // Separate mutex guarding mUpdateCallback, so that it may be invoked outside
+ // of the main buffer `mMutex`.
+ mutable baseprofiler::detail::BaseProfilerMutex mUpdateCallbackMutex;
+
+ UpdateCallback mUpdateCallback;
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferChunkManagerWithLocalLimit_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
new file mode 100644
index 0000000000..45b39b163c
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
@@ -0,0 +1,203 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferControlledChunkManager_h
+#define ProfileBufferControlledChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+
+#include <functional>
+#include <vector>
+
+namespace mozilla {
+
+// A "Controlled" chunk manager will provide updates about chunks that it
+// creates, releases, and destroys; and it can destroy released chunks as
+// requested.
+class ProfileBufferControlledChunkManager {
+ public:
+ using Length = ProfileBufferChunk::Length;
+
+ virtual ~ProfileBufferControlledChunkManager() = default;
+
+ // Minimum amount of chunk metadata to be transferred between processes.
+ struct ChunkMetadata {
+ // Timestamp when chunk was marked "done", which is used to:
+ // - determine its age, so the oldest one will be destroyed first,
+ // - uniquely identify this chunk in this process. (The parent process is
+ // responsible for associating this timestamp to its process id.)
+ TimeStamp mDoneTimeStamp;
+ // Size of this chunk's buffer.
+ Length mBufferBytes;
+
+ ChunkMetadata(TimeStamp aDoneTimeStamp, Length aBufferBytes)
+ : mDoneTimeStamp(aDoneTimeStamp), mBufferBytes(aBufferBytes) {}
+ };
+
+ // Class collecting all information necessary to describe updates that
+ // happened in a chunk manager.
+ // An update can be folded into a previous update.
+ class Update {
+ public:
+ // Construct a "not-an-Update" object, which should only be used after a
+ // real update is folded into it.
+ Update() = default;
+
+ // Construct a "final" Update, which marks the end of all updates from a
+ // chunk manager.
+ explicit Update(decltype(nullptr)) : mUnreleasedBytes(FINAL) {}
+
+ // Construct an Update from the given data and released chunks.
+ // The chunk pointers may be null, and it doesn't matter if
+ // `aNewlyReleasedChunks` is already linked to `aExistingReleasedChunks` or
+ // not.
+ Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+ const ProfileBufferChunk* aExistingReleasedChunks,
+ const ProfileBufferChunk* aNewlyReleasedChunks)
+ : mUnreleasedBytes(aUnreleasedBytes),
+ mReleasedBytes(aReleasedBytes),
+ mOldestDoneTimeStamp(
+ aExistingReleasedChunks
+ ? aExistingReleasedChunks->ChunkHeader().mDoneTimeStamp
+ : TimeStamp{}) {
+ MOZ_RELEASE_ASSERT(
+ !IsNotUpdate(),
+ "Empty update should only be constructed with default constructor");
+ MOZ_RELEASE_ASSERT(
+ !IsFinal(),
+ "Final update should only be constructed with nullptr constructor");
+ for (const ProfileBufferChunk* chunk = aNewlyReleasedChunks; chunk;
+ chunk = chunk->GetNext()) {
+ mNewlyReleasedChunks.emplace_back(ChunkMetadata{
+ chunk->ChunkHeader().mDoneTimeStamp, chunk->BufferBytes()});
+ }
+ }
+
+ // Construct an Update from raw data.
+ // This may be used to re-construct an Update that was previously
+ // serialized.
+ Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+ TimeStamp aOldestDoneTimeStamp,
+ std::vector<ChunkMetadata>&& aNewlyReleasedChunks)
+ : mUnreleasedBytes(aUnreleasedBytes),
+ mReleasedBytes(aReleasedBytes),
+ mOldestDoneTimeStamp(aOldestDoneTimeStamp),
+ mNewlyReleasedChunks(std::move(aNewlyReleasedChunks)) {}
+
+ // Clear the Update completely and return it to a "not-an-Update" state.
+ void Clear() {
+ mUnreleasedBytes = NO_UPDATE;
+ mReleasedBytes = 0;
+ mOldestDoneTimeStamp = TimeStamp{};
+ mNewlyReleasedChunks.clear();
+ }
+
+ bool IsNotUpdate() const { return mUnreleasedBytes == NO_UPDATE; }
+
+ bool IsFinal() const { return mUnreleasedBytes == FINAL; }
+
+ size_t UnreleasedBytes() const {
+ MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+ "Cannot access UnreleasedBytes from empty update");
+ MOZ_RELEASE_ASSERT(!IsFinal(),
+ "Cannot access UnreleasedBytes from final update");
+ return mUnreleasedBytes;
+ }
+
+ size_t ReleasedBytes() const {
+ MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+ "Cannot access ReleasedBytes from empty update");
+ MOZ_RELEASE_ASSERT(!IsFinal(),
+ "Cannot access ReleasedBytes from final update");
+ return mReleasedBytes;
+ }
+
+ TimeStamp OldestDoneTimeStamp() const {
+ MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+ "Cannot access OldestDoneTimeStamp from empty update");
+ MOZ_RELEASE_ASSERT(!IsFinal(),
+ "Cannot access OldestDoneTimeStamp from final update");
+ return mOldestDoneTimeStamp;
+ }
+
+ const std::vector<ChunkMetadata>& NewlyReleasedChunksRef() const {
+ MOZ_RELEASE_ASSERT(
+ !IsNotUpdate(),
+ "Cannot access NewlyReleasedChunksRef from empty update");
+ MOZ_RELEASE_ASSERT(
+ !IsFinal(), "Cannot access NewlyReleasedChunksRef from final update");
+ return mNewlyReleasedChunks;
+ }
+
+ // Fold a later update into this one.
+ void Fold(Update&& aNewUpdate) {
+ MOZ_ASSERT(
+ !IsFinal() || aNewUpdate.IsFinal(),
+ "There shouldn't be another non-final update after the final update");
+
+ if (IsNotUpdate() || aNewUpdate.IsFinal()) {
+ // We were empty, or the new update is the final update, we just switch
+ // to that new update.
+ *this = std::move(aNewUpdate);
+ return;
+ }
+
+ mUnreleasedBytes = aNewUpdate.mUnreleasedBytes;
+ mReleasedBytes = aNewUpdate.mReleasedBytes;
+ if (!aNewUpdate.mOldestDoneTimeStamp.IsNull()) {
+ MOZ_ASSERT(mOldestDoneTimeStamp.IsNull() ||
+ mOldestDoneTimeStamp <= aNewUpdate.mOldestDoneTimeStamp);
+ mOldestDoneTimeStamp = aNewUpdate.mOldestDoneTimeStamp;
+ auto it = mNewlyReleasedChunks.begin();
+ while (it != mNewlyReleasedChunks.end() &&
+ it->mDoneTimeStamp < mOldestDoneTimeStamp) {
+ it = mNewlyReleasedChunks.erase(it);
+ }
+ }
+ if (!aNewUpdate.mNewlyReleasedChunks.empty()) {
+ mNewlyReleasedChunks.reserve(mNewlyReleasedChunks.size() +
+ aNewUpdate.mNewlyReleasedChunks.size());
+ mNewlyReleasedChunks.insert(mNewlyReleasedChunks.end(),
+ aNewUpdate.mNewlyReleasedChunks.begin(),
+ aNewUpdate.mNewlyReleasedChunks.end());
+ }
+ }
+
+ private:
+ static const size_t NO_UPDATE = size_t(-1);
+ static const size_t FINAL = size_t(-2);
+
+ size_t mUnreleasedBytes = NO_UPDATE;
+ size_t mReleasedBytes = 0;
+ TimeStamp mOldestDoneTimeStamp;
+ std::vector<ChunkMetadata> mNewlyReleasedChunks;
+ };
+
+ using UpdateCallback = std::function<void(Update&&)>;
+
+ // This *may* be set (or reset) by an object that needs to know about all
+ // chunk updates that happen in this manager. The main use will be to
+ // coordinate the global memory usage of Firefox.
+ // If a non-empty callback is given, it will be immediately invoked with the
+ // current state.
+ // When the callback is about to be destroyed (by overwriting it here, or in
+ // the class destructor), it will be invoked one last time with an empty
+ // update.
+ // Note that the callback (even the first current-state callback) will be
+ // invoked from inside a locked scope, so it should *not* call other functions
+ // of the chunk manager. A side benefit of this locking is that it guarantees
+ // that no two invocations can overlap.
+ virtual void SetUpdateCallback(UpdateCallback&& aUpdateCallback) = 0;
+
+ // This is a request to destroy all chunks before the given timestamp.
+ // This timestamp should be one that was given in a previous UpdateCallback
+ // call. Obviously, only released chunks can be destroyed.
+ virtual void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) = 0;
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferControlledChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
new file mode 100644
index 0000000000..bffe5cfac5
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
@@ -0,0 +1,104 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntryKinds_h
+#define ProfileBufferEntryKinds_h
+
+#include "mozilla/BaseProfilerUtils.h"
+
+#include <cstdint>
+
+namespace mozilla {
+
+// This is equal to sizeof(double), which is the largest non-char variant in
+// |u|.
+static constexpr size_t ProfileBufferEntryNumChars = 8;
+
+// NOTE! If you add entries, you need to verify if they need to be added to the
+// switch statement in DuplicateLastSample!
+// This will evaluate the MACRO with (KIND, TYPE, SIZE)
+#define FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(MACRO) \
+ MACRO(CategoryPair, int, sizeof(int)) \
+ MACRO(CollectionStart, double, sizeof(double)) \
+ MACRO(CollectionEnd, double, sizeof(double)) \
+ MACRO(Label, const char*, sizeof(const char*)) \
+ MACRO(FrameFlags, uint64_t, sizeof(uint64_t)) \
+ MACRO(DynamicStringFragment, char*, ProfileBufferEntryNumChars) \
+ MACRO(JitReturnAddr, void*, sizeof(void*)) \
+ MACRO(InnerWindowID, uint64_t, sizeof(uint64_t)) \
+ MACRO(LineNumber, int, sizeof(int)) \
+ MACRO(ColumnNumber, int, sizeof(int)) \
+ MACRO(NativeLeafAddr, void*, sizeof(void*)) \
+ MACRO(Pause, double, sizeof(double)) \
+ MACRO(Resume, double, sizeof(double)) \
+ MACRO(PauseSampling, double, sizeof(double)) \
+ MACRO(ResumeSampling, double, sizeof(double)) \
+ MACRO(Responsiveness, double, sizeof(double)) \
+ MACRO(ThreadId, ::mozilla::baseprofiler::BaseProfilerThreadId, \
+ sizeof(::mozilla::baseprofiler::BaseProfilerThreadId)) \
+ MACRO(Time, double, sizeof(double)) \
+ MACRO(TimeBeforeCompactStack, double, sizeof(double)) \
+ MACRO(TimeBeforeSameSample, double, sizeof(double)) \
+ MACRO(CounterId, void*, sizeof(void*)) \
+ MACRO(Number, uint64_t, sizeof(uint64_t)) \
+ MACRO(Count, int64_t, sizeof(int64_t)) \
+ MACRO(ProfilerOverheadTime, double, sizeof(double)) \
+ MACRO(ProfilerOverheadDuration, double, sizeof(double))
+
+// The `Kind` is a single byte identifying the type of data that is actually
+// stored in a `ProfileBufferEntry`, as per the list in
+// `FOR_EACH_PROFILE_BUFFER_ENTRY_KIND`.
+//
+// This byte is also used to identify entries in ProfileChunkedBuffer blocks,
+// for both "legacy" entries that do contain a `ProfileBufferEntry`, and for
+// new types of entries that may carry more data of different types.
+// TODO: Eventually each type of "legacy" entry should be replaced with newer,
+// more efficient kinds of entries (e.g., stack frames could be stored in one
+// bigger entry, instead of multiple `ProfileBufferEntry`s); then we could
+// discard `ProfileBufferEntry` and move this enum to a more appropriate spot.
+enum class ProfileBufferEntryKind : uint8_t {
+ INVALID = 0,
+#define KIND(KIND, TYPE, SIZE) KIND,
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(KIND)
+#undef KIND
+
+ // Any value under `LEGACY_LIMIT` represents a `ProfileBufferEntry`.
+ LEGACY_LIMIT,
+
+ // Any value starting here does *not* represent a `ProfileBufferEntry` and
+ // requires separate decoding and handling.
+
+ // Markers and their data.
+ Marker = LEGACY_LIMIT,
+
+ // Entry with "running times", such as CPU usage measurements.
+ // Optional between TimeBeforeX and X.
+ RunningTimes,
+
+ // Optional between TimeBeforeX and X.
+ UnresponsiveDurationMs,
+
+ // Collection of legacy stack entries, must follow a ThreadId and
+ // TimeBeforeCompactStack (which are not included in the CompactStack;
+ // TimeBeforeCompactStack is equivalent to Time, but indicates that a
+ // CompactStack follows shortly afterwards).
+ CompactStack,
+
+ // Indicates that this sample is identical to the previous one, must follow a
+ // ThreadId and TimeBeforeSameSample.
+ SameSample,
+
+ MODERN_LIMIT
+};
+
+enum class MarkerPayloadType : uint8_t {
+ Cpp,
+ Rust,
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferEntryKinds_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
new file mode 100644
index 0000000000..7ba19b070d
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
@@ -0,0 +1,1184 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntrySerialization_h
+#define ProfileBufferEntrySerialization_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Likely.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Variant.h"
+
+#include <string>
+#include <tuple>
+
+namespace mozilla {
+
+class ProfileBufferEntryWriter;
+
+// Iterator-like class used to read from an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// where the entry lives.
+class ProfileBufferEntryReader {
+ public:
+ using Byte = uint8_t;
+ using Length = uint32_t;
+
+ using SpanOfConstBytes = Span<const Byte>;
+
+ // Class to be specialized for types to be read from a profile buffer entry.
+ // See common specializations at the bottom of this header.
+ // The following static functions must be provided:
+ // static void ReadInto(EntryReader aER&, T& aT)
+ // {
+ // /* Call `aER.ReadX(...)` function to deserialize into aT, be sure to
+ // read exactly `Bytes(aT)`! */
+ // }
+ // static T Read(EntryReader& aER) {
+ // /* Call `aER.ReadX(...)` function to deserialize and return a `T`, be
+ // sure to read exactly `Bytes(returned value)`! */
+ // }
+ template <typename T>
+ struct Deserializer;
+
+ ProfileBufferEntryReader() = default;
+
+ // Reader over one Span.
+ ProfileBufferEntryReader(SpanOfConstBytes aSpan,
+ ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex)
+ : mCurrentSpan(aSpan),
+ mNextSpanOrEmpty(aSpan.Last(0)),
+ mCurrentBlockIndex(aCurrentBlockIndex),
+ mNextBlockIndex(aNextBlockIndex) {
+ // 2nd internal Span points at the end of the 1st internal Span, to enforce
+ // invariants.
+ CheckInvariants();
+ }
+
+ // Reader over two Spans, the second one must not be empty.
+ ProfileBufferEntryReader(SpanOfConstBytes aSpanHead,
+ SpanOfConstBytes aSpanTail,
+ ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex)
+ : mCurrentSpan(aSpanHead),
+ mNextSpanOrEmpty(aSpanTail),
+ mCurrentBlockIndex(aCurrentBlockIndex),
+ mNextBlockIndex(aNextBlockIndex) {
+ MOZ_RELEASE_ASSERT(!mNextSpanOrEmpty.IsEmpty());
+ if (MOZ_UNLIKELY(mCurrentSpan.IsEmpty())) {
+ // First span is already empty, skip it.
+ mCurrentSpan = mNextSpanOrEmpty;
+ mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+ }
+ CheckInvariants();
+ }
+
+ // Allow copying, which is needed when used as an iterator in some std
+ // functions (e.g., string assignment), and to occasionally backtrack.
+ // Be aware that the main profile buffer APIs give a reference to an entry
+ // reader, and expect that reader to advance to the end of the entry, so don't
+ // just advance copies!
+ ProfileBufferEntryReader(const ProfileBufferEntryReader&) = default;
+ ProfileBufferEntryReader& operator=(const ProfileBufferEntryReader&) =
+ default;
+
+ // Don't =default moving, as it doesn't bring any benefit in this class.
+
+ [[nodiscard]] Length RemainingBytes() const {
+ return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+ }
+
+ void SetRemainingBytes(Length aBytes) {
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ if (aBytes <= mCurrentSpan.LengthBytes()) {
+ mCurrentSpan = mCurrentSpan.First(aBytes);
+ mNextSpanOrEmpty = mCurrentSpan.Last(0);
+ } else {
+ mNextSpanOrEmpty =
+ mNextSpanOrEmpty.First(aBytes - mCurrentSpan.LengthBytes());
+ }
+ }
+
+ [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+ return mCurrentBlockIndex;
+ }
+
+ [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+ return mNextBlockIndex;
+ }
+
+ // Create a reader of size zero, pointing at aOffset past the current position
+ // of this Reader, so it can be used as end iterator.
+ [[nodiscard]] ProfileBufferEntryReader EmptyIteratorAtOffset(
+ Length aOffset) const {
+ MOZ_RELEASE_ASSERT(aOffset <= RemainingBytes());
+ if (MOZ_LIKELY(aOffset < mCurrentSpan.LengthBytes())) {
+ // aOffset is before the end of mCurrentSpan.
+ return ProfileBufferEntryReader(mCurrentSpan.Subspan(aOffset, 0),
+ mCurrentBlockIndex, mNextBlockIndex);
+ }
+ // aOffset is right at the end of mCurrentSpan, or inside mNextSpanOrEmpty.
+ return ProfileBufferEntryReader(
+ mNextSpanOrEmpty.Subspan(aOffset - mCurrentSpan.LengthBytes(), 0),
+ mCurrentBlockIndex, mNextBlockIndex);
+ }
+
+ // Be like a limited input iterator, with only `*`, prefix-`++`, `==`, `!=`.
+ // These definitions are expected by std functions, to recognize this as an
+ // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+ using difference_type = std::make_signed_t<Length>;
+ using value_type = Byte;
+ using pointer = const Byte*;
+ using reference = const Byte&;
+ using iterator_category = std::input_iterator_tag;
+
+ [[nodiscard]] const Byte& operator*() {
+ // Assume the caller will read from the returned reference (and not just
+ // take the address).
+ MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+ return *(mCurrentSpan.Elements());
+ }
+
+ ProfileBufferEntryReader& operator++() {
+ MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+ if (MOZ_LIKELY(mCurrentSpan.LengthBytes() > 1)) {
+ // More than 1 byte left in mCurrentSpan, just eat it.
+ mCurrentSpan = mCurrentSpan.From(1);
+ } else {
+ // mCurrentSpan will be empty, move mNextSpanOrEmpty to mCurrentSpan.
+ mCurrentSpan = mNextSpanOrEmpty;
+ mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+ }
+ CheckInvariants();
+ return *this;
+ }
+
+ ProfileBufferEntryReader& operator+=(Length aBytes) {
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+ // All bytes are in mCurrentSpan.
+ // Update mCurrentSpan past the read bytes.
+ mCurrentSpan = mCurrentSpan.From(aBytes);
+ if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) {
+ // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into
+ // mCurrentSpan.
+ mCurrentSpan = mNextSpanOrEmpty;
+ mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+ }
+ } else {
+ // mCurrentSpan does not hold enough bytes.
+ // This should only happen at most once: Only for double spans, and when
+ // data crosses the gap.
+ const Length tail =
+ aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+ // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+ // will go back to the true case above.
+ mCurrentSpan = mNextSpanOrEmpty.From(tail);
+ mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+ }
+ CheckInvariants();
+ return *this;
+ }
+
+ [[nodiscard]] bool operator==(const ProfileBufferEntryReader& aOther) const {
+ return mCurrentSpan.Elements() == aOther.mCurrentSpan.Elements();
+ }
+ [[nodiscard]] bool operator!=(const ProfileBufferEntryReader& aOther) const {
+ return mCurrentSpan.Elements() != aOther.mCurrentSpan.Elements();
+ }
+
+ // Read an unsigned LEB128 number and move iterator ahead.
+ template <typename T>
+ [[nodiscard]] T ReadULEB128() {
+ return ::mozilla::ReadULEB128<T>(*this);
+ }
+
+ // This struct points at a number of bytes through either one span, or two
+ // separate spans (in the rare cases when it is split between two chunks).
+ // So the possibilities are:
+ // - Totally empty: { [] [] }
+ // - First span is not empty: { [content] [] } (Most common case.)
+ // - Both spans are not empty: { [cont] [ent] }
+ // But something like { [] [content] } is not possible.
+ //
+ // Recommended usage patterns:
+ // - Call a utility function like `CopyBytesTo` if you always need to copy the
+ // data to an outside buffer, e.g., to deserialize an aligned object.
+ // - Access both spans one after the other; Note that the second one may be
+ // empty; and the fist could be empty as well if there is no data at all.
+ // - Check is the second span is empty, in which case you only need to read
+ // the first one; and since its part of a chunk, it may be directly passed
+ // as an unaligned pointer or reference, thereby saving one copy. But
+ // remember to always handle the double-span case as well.
+ //
+ // Reminder: An empty span still has a non-null pointer, so it's safe to use
+ // with functions like memcpy.
+ struct DoubleSpanOfConstBytes {
+ SpanOfConstBytes mFirstOrOnly;
+ SpanOfConstBytes mSecondOrEmpty;
+
+ void CheckInvariants() const {
+ MOZ_ASSERT(mFirstOrOnly.IsEmpty() ? mSecondOrEmpty.IsEmpty() : true,
+ "mSecondOrEmpty should not be the only span to contain data");
+ }
+
+ DoubleSpanOfConstBytes() : mFirstOrOnly(), mSecondOrEmpty() {
+ CheckInvariants();
+ }
+
+ DoubleSpanOfConstBytes(const Byte* aOnlyPointer, size_t aOnlyLength)
+ : mFirstOrOnly(aOnlyPointer, aOnlyLength), mSecondOrEmpty() {
+ CheckInvariants();
+ }
+
+ DoubleSpanOfConstBytes(const Byte* aFirstPointer, size_t aFirstLength,
+ const Byte* aSecondPointer, size_t aSecondLength)
+ : mFirstOrOnly(aFirstPointer, aFirstLength),
+ mSecondOrEmpty(aSecondPointer, aSecondLength) {
+ CheckInvariants();
+ }
+
+ // Is there no data at all?
+ [[nodiscard]] bool IsEmpty() const {
+ // We only need to check the first span, because if it's empty, the second
+ // one must be empty as well.
+ return mFirstOrOnly.IsEmpty();
+ }
+
+ // Total length (in bytes) pointed at by both spans.
+ [[nodiscard]] size_t LengthBytes() const {
+ return mFirstOrOnly.LengthBytes() + mSecondOrEmpty.LengthBytes();
+ }
+
+ // Utility functions to copy all `LengthBytes()` to a given buffer.
+ void CopyBytesTo(void* aDest) const {
+ memcpy(aDest, mFirstOrOnly.Elements(), mFirstOrOnly.LengthBytes());
+ if (MOZ_UNLIKELY(!mSecondOrEmpty.IsEmpty())) {
+ memcpy(static_cast<Byte*>(aDest) + mFirstOrOnly.LengthBytes(),
+ mSecondOrEmpty.Elements(), mSecondOrEmpty.LengthBytes());
+ }
+ }
+
+ // If the second span is empty, only the first span may point at data.
+ [[nodiscard]] bool IsSingleSpan() const { return mSecondOrEmpty.IsEmpty(); }
+ };
+
+ // Get Span(s) to a sequence of bytes, see `DoubleSpanOfConstBytes` for usage.
+ // Note that the reader location is *not* updated, do `+=` on it afterwards.
+ [[nodiscard]] DoubleSpanOfConstBytes PeekSpans(Length aBytes) const {
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+ // All `aBytes` are in the current chunk, only one span is needed.
+ return DoubleSpanOfConstBytes{mCurrentSpan.Elements(), aBytes};
+ }
+ // Otherwise the first span covers then end of the current chunk, and the
+ // second span starts in the next chunk.
+ return DoubleSpanOfConstBytes{
+ mCurrentSpan.Elements(), mCurrentSpan.LengthBytes(),
+ mNextSpanOrEmpty.Elements(), aBytes - mCurrentSpan.LengthBytes()};
+ }
+
+ // Get Span(s) to a sequence of bytes, see `DoubleSpanOfConstBytes` for usage,
+ // and move the reader forward.
+ [[nodiscard]] DoubleSpanOfConstBytes ReadSpans(Length aBytes) {
+ DoubleSpanOfConstBytes spans = PeekSpans(aBytes);
+ (*this) += aBytes;
+ return spans;
+ }
+
+ // Read a sequence of bytes, like memcpy.
+ void ReadBytes(void* aDest, Length aBytes) {
+ DoubleSpanOfConstBytes spans = ReadSpans(aBytes);
+ MOZ_ASSERT(spans.LengthBytes() == aBytes);
+ spans.CopyBytesTo(aDest);
+ }
+
+ template <typename T>
+ void ReadIntoObject(T& aObject) {
+ Deserializer<T>::ReadInto(*this, aObject);
+ }
+
+ // Read into one or more objects, sequentially.
+ // `EntryReader::ReadIntoObjects()` with nothing is implicitly allowed, this
+ // could be useful for generic programming.
+ template <typename... Ts>
+ void ReadIntoObjects(Ts&... aTs) {
+ (ReadIntoObject(aTs), ...);
+ }
+
+ // Read data as an object and move iterator ahead.
+ template <typename T>
+ [[nodiscard]] T ReadObject() {
+ T ob = Deserializer<T>::Read(*this);
+ return ob;
+ }
+
+ private:
+ friend class ProfileBufferEntryWriter;
+
+ // Invariants:
+ // - mCurrentSpan cannot be empty unless mNextSpanOrEmpty is also empty. So
+ // mCurrentSpan always points at the next byte to read or the end.
+ // - If mNextSpanOrEmpty is empty, it points at the end of mCurrentSpan. So
+ // when reaching the end of mCurrentSpan, we can blindly move
+ // mNextSpanOrEmpty to mCurrentSpan and keep the invariants.
+ SpanOfConstBytes mCurrentSpan;
+ SpanOfConstBytes mNextSpanOrEmpty;
+ ProfileBufferBlockIndex mCurrentBlockIndex;
+ ProfileBufferBlockIndex mNextBlockIndex;
+
+ void CheckInvariants() const {
+ MOZ_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+ MOZ_ASSERT(!mNextSpanOrEmpty.IsEmpty() ||
+ (mNextSpanOrEmpty == mCurrentSpan.Last(0)));
+ }
+};
+
+// Iterator-like class used to write into an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// reserved for the entry.
+class ProfileBufferEntryWriter {
+ public:
+ using Byte = uint8_t;
+ using Length = uint32_t;
+
+ using SpanOfBytes = Span<Byte>;
+
+ // Class to be specialized for types to be written in an entry.
+ // See common specializations at the bottom of this header.
+ // The following static functions must be provided:
+ // static Length Bytes(const T& aT) {
+ // /* Return number of bytes that will be written. */
+ // }
+ // static void Write(ProfileBufferEntryWriter& aEW,
+ // const T& aT) {
+ // /* Call `aEW.WriteX(...)` functions to serialize aT, be sure to write
+ // exactly `Bytes(aT)` bytes! */
+ // }
+ template <typename T>
+ struct Serializer;
+
+ ProfileBufferEntryWriter() = default;
+
+ ProfileBufferEntryWriter(SpanOfBytes aSpan,
+ ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex)
+ : mCurrentSpan(aSpan),
+ mCurrentBlockIndex(aCurrentBlockIndex),
+ mNextBlockIndex(aNextBlockIndex) {}
+
+ ProfileBufferEntryWriter(SpanOfBytes aSpanHead, SpanOfBytes aSpanTail,
+ ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex)
+ : mCurrentSpan(aSpanHead),
+ mNextSpanOrEmpty(aSpanTail),
+ mCurrentBlockIndex(aCurrentBlockIndex),
+ mNextBlockIndex(aNextBlockIndex) {
+ // Either:
+ // - mCurrentSpan is not empty, OR
+ // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+ MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+ }
+
+ // Disable copying and moving, so we can't have multiple writing heads.
+ ProfileBufferEntryWriter(const ProfileBufferEntryWriter&) = delete;
+ ProfileBufferEntryWriter& operator=(const ProfileBufferEntryWriter&) = delete;
+ ProfileBufferEntryWriter(ProfileBufferEntryWriter&&) = delete;
+ ProfileBufferEntryWriter& operator=(ProfileBufferEntryWriter&&) = delete;
+
+ void Set() {
+ mCurrentSpan = SpanOfBytes{};
+ mNextSpanOrEmpty = SpanOfBytes{};
+ mCurrentBlockIndex = nullptr;
+ mNextBlockIndex = nullptr;
+ }
+
+ void Set(SpanOfBytes aSpan, ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex) {
+ mCurrentSpan = aSpan;
+ mNextSpanOrEmpty = SpanOfBytes{};
+ mCurrentBlockIndex = aCurrentBlockIndex;
+ mNextBlockIndex = aNextBlockIndex;
+ }
+
+ void Set(SpanOfBytes aSpan0, SpanOfBytes aSpan1,
+ ProfileBufferBlockIndex aCurrentBlockIndex,
+ ProfileBufferBlockIndex aNextBlockIndex) {
+ mCurrentSpan = aSpan0;
+ mNextSpanOrEmpty = aSpan1;
+ mCurrentBlockIndex = aCurrentBlockIndex;
+ mNextBlockIndex = aNextBlockIndex;
+ // Either:
+ // - mCurrentSpan is not empty, OR
+ // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+ MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+ }
+
+ [[nodiscard]] Length RemainingBytes() const {
+ return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+ }
+
+ [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+ return mCurrentBlockIndex;
+ }
+
+ [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+ return mNextBlockIndex;
+ }
+
+ // Be like a limited output iterator, with only `*` and prefix-`++`.
+ // These definitions are expected by std functions, to recognize this as an
+ // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+ using value_type = Byte;
+ using pointer = Byte*;
+ using reference = Byte&;
+ using iterator_category = std::output_iterator_tag;
+
+ [[nodiscard]] Byte& operator*() {
+ MOZ_RELEASE_ASSERT(RemainingBytes() >= 1);
+ return *(
+ (MOZ_LIKELY(!mCurrentSpan.IsEmpty()) ? mCurrentSpan : mNextSpanOrEmpty)
+ .Elements());
+ }
+
+ ProfileBufferEntryWriter& operator++() {
+ if (MOZ_LIKELY(mCurrentSpan.LengthBytes() >= 1)) {
+ // There is at least 1 byte in mCurrentSpan, eat it.
+ mCurrentSpan = mCurrentSpan.From(1);
+ } else {
+ // mCurrentSpan is empty, move mNextSpanOrEmpty (past the first byte) to
+ // mCurrentSpan.
+ MOZ_RELEASE_ASSERT(mNextSpanOrEmpty.LengthBytes() >= 1);
+ mCurrentSpan = mNextSpanOrEmpty.From(1);
+ mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+ }
+ return *this;
+ }
+
+ ProfileBufferEntryWriter& operator+=(Length aBytes) {
+ // Note: This is a rare operation. The code below is a copy of `WriteBytes`
+ // but without the `memcpy`s.
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+ // Data fits in mCurrentSpan.
+ // Update mCurrentSpan. It may become empty, so in case of a double span,
+ // the next call will go to the false case below.
+ mCurrentSpan = mCurrentSpan.From(aBytes);
+ } else {
+ // Data does not fully fit in mCurrentSpan.
+ // This should only happen at most once: Only for double spans, and when
+ // data crosses the gap or starts there.
+ const Length tail =
+ aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+ // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+ // will go back to the true case above.
+ mCurrentSpan = mNextSpanOrEmpty.From(tail);
+ mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+ }
+ return *this;
+ }
+
+ // Number of bytes needed to represent `aValue` in unsigned LEB128.
+ template <typename T>
+ [[nodiscard]] static unsigned ULEB128Size(T aValue) {
+ return ::mozilla::ULEB128Size(aValue);
+ }
+
+ // Write number as unsigned LEB128 and move iterator ahead.
+ template <typename T>
+ void WriteULEB128(T aValue) {
+ ::mozilla::WriteULEB128(aValue, *this);
+ }
+
+ // Number of bytes needed to serialize objects.
+ template <typename... Ts>
+ [[nodiscard]] static Length SumBytes(const Ts&... aTs) {
+ return (0 + ... + Serializer<Ts>::Bytes(aTs));
+ }
+
+ // Write a sequence of bytes, like memcpy.
+ void WriteBytes(const void* aSrc, Length aBytes) {
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+ // Data fits in mCurrentSpan.
+ memcpy(mCurrentSpan.Elements(), aSrc, aBytes);
+ // Update mCurrentSpan. It may become empty, so in case of a double span,
+ // the next call will go to the false case below.
+ mCurrentSpan = mCurrentSpan.From(aBytes);
+ } else {
+ // Data does not fully fit in mCurrentSpan.
+ // This should only happen at most once: Only for double spans, and when
+ // data crosses the gap or starts there.
+ // Split data between the end of mCurrentSpan and the beginning of
+ // mNextSpanOrEmpty. (mCurrentSpan could be empty, it's ok to do a memcpy
+ // because Span::Elements() is never null.)
+ memcpy(mCurrentSpan.Elements(), aSrc, mCurrentSpan.LengthBytes());
+ const Length tail =
+ aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+ memcpy(mNextSpanOrEmpty.Elements(),
+ reinterpret_cast<const Byte*>(aSrc) + mCurrentSpan.LengthBytes(),
+ tail);
+ // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+ // will go back to the true case above.
+ mCurrentSpan = mNextSpanOrEmpty.From(tail);
+ mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+ }
+ }
+
+ void WriteFromReader(ProfileBufferEntryReader& aReader, Length aBytes) {
+ MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+ MOZ_RELEASE_ASSERT(aBytes <= aReader.RemainingBytes());
+ Length read0 = std::min(
+ aBytes, static_cast<Length>(aReader.mCurrentSpan.LengthBytes()));
+ if (read0 != 0) {
+ WriteBytes(aReader.mCurrentSpan.Elements(), read0);
+ }
+ Length read1 = aBytes - read0;
+ if (read1 != 0) {
+ WriteBytes(aReader.mNextSpanOrEmpty.Elements(), read1);
+ }
+ aReader += aBytes;
+ }
+
+ // Write a single object by using the appropriate Serializer.
+ template <typename T>
+ void WriteObject(const T& aObject) {
+ Serializer<T>::Write(*this, aObject);
+ }
+
+ // Write one or more objects, sequentially.
+ // Allow `EntryWrite::WriteObjects()` with nothing, this could be useful
+ // for generic programming.
+ template <typename... Ts>
+ void WriteObjects(const Ts&... aTs) {
+ (WriteObject(aTs), ...);
+ }
+
+ private:
+ // The two spans covering the memory still to be written.
+ SpanOfBytes mCurrentSpan;
+ SpanOfBytes mNextSpanOrEmpty;
+ ProfileBufferBlockIndex mCurrentBlockIndex;
+ ProfileBufferBlockIndex mNextBlockIndex;
+};
+
+// ============================================================================
+// Serializer and Deserializer ready-to-use specializations.
+
+// ----------------------------------------------------------------------------
+// Trivially-copyable types (default)
+
+// The default implementation works for all trivially-copyable types (e.g.,
+// PODs).
+//
+// Usage: `aEW.WriteObject(123);`.
+//
+// Raw pointers, though trivially-copyable, are explictly forbidden when writing
+// (to avoid unexpected leaks/UAFs), instead use one of
+// `WrapProfileBufferLiteralCStringPointer`, `WrapProfileBufferUnownedCString`,
+// or `WrapProfileBufferRawPointer` as needed.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer {
+ static_assert(std::is_trivially_copyable_v<T>,
+ "Serializer only works with trivially-copyable types by "
+ "default, use/add specialization for other types.");
+
+ static constexpr Length Bytes(const T&) { return sizeof(T); }
+
+ static void Write(ProfileBufferEntryWriter& aEW, const T& aT) {
+ static_assert(!std::is_pointer<T>::value,
+ "Serializer won't write raw pointers by default, use "
+ "WrapProfileBufferRawPointer or other.");
+ aEW.WriteBytes(&aT, sizeof(T));
+ }
+};
+
+// Usage: `aER.ReadObject<int>();` or `int x; aER.ReadIntoObject(x);`.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer {
+ static_assert(std::is_trivially_copyable_v<T>,
+ "Deserializer only works with trivially-copyable types by "
+ "default, use/add specialization for other types.");
+
+ static void ReadInto(ProfileBufferEntryReader& aER, T& aT) {
+ aER.ReadBytes(&aT, sizeof(T));
+ }
+
+ static T Read(ProfileBufferEntryReader& aER) {
+ // Note that this creates a default `T` first, and then overwrites it with
+ // bytes from the buffer. Trivially-copyable types support this without UB.
+ T ob;
+ ReadInto(aER, ob);
+ return ob;
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Strip const/volatile/reference from types.
+
+// Automatically strip `const`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<const T>
+ : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<const T>
+ : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `volatile`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<volatile T>
+ : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<volatile T>
+ : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `lvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&>
+ : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&>
+ : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `rvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&&>
+ : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&&>
+ : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// ----------------------------------------------------------------------------
+// ProfileBufferBlockIndex
+
+// ProfileBufferBlockIndex, serialized as the underlying value.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferBlockIndex> {
+ static constexpr Length Bytes(const ProfileBufferBlockIndex& aBlockIndex) {
+ return sizeof(ProfileBufferBlockIndex);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const ProfileBufferBlockIndex& aBlockIndex) {
+ aEW.WriteBytes(&aBlockIndex, sizeof(aBlockIndex));
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferBlockIndex> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ ProfileBufferBlockIndex& aBlockIndex) {
+ aER.ReadBytes(&aBlockIndex, sizeof(aBlockIndex));
+ }
+
+ static ProfileBufferBlockIndex Read(ProfileBufferEntryReader& aER) {
+ ProfileBufferBlockIndex blockIndex;
+ ReadInto(aER, blockIndex);
+ return blockIndex;
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Literal C string pointer
+
+// Wrapper around a pointer to a literal C string.
+template <size_t NonTerminalCharacters>
+struct ProfileBufferLiteralCStringPointer {
+ const char* mCString;
+};
+
+// Wrap a pointer to a literal C string.
+template <size_t CharactersIncludingTerminal>
+ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal - 1>
+WrapProfileBufferLiteralCStringPointer(
+ const char (&aCString)[CharactersIncludingTerminal]) {
+ return {aCString};
+}
+
+// Literal C strings, serialized as the raw pointer because it is unique and
+// valid for the whole program lifetime.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferLiteralCStringPointer("hi"));`.
+//
+// No deserializer is provided for this type, instead it must be deserialized as
+// a raw pointer: `aER.ReadObject<const char*>();`
+template <size_t CharactersIncludingTerminal>
+struct ProfileBufferEntryReader::Deserializer<
+ ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>> {
+ static constexpr Length Bytes(
+ const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&) {
+ // We're only storing a pointer, its size is independent from the pointer
+ // value.
+ return sizeof(const char*);
+ }
+
+ static void Write(
+ ProfileBufferEntryWriter& aEW,
+ const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&
+ aWrapper) {
+ // Write the pointer *value*, not the string contents.
+ aEW.WriteBytes(aWrapper.mCString, sizeof(aWrapper.mCString));
+ }
+};
+
+// ----------------------------------------------------------------------------
+// C string contents
+
+// Wrapper around a pointer to a C string whose contents will be serialized.
+struct ProfileBufferUnownedCString {
+ const char* mCString;
+};
+
+// Wrap a pointer to a C string whose contents will be serialized.
+inline ProfileBufferUnownedCString WrapProfileBufferUnownedCString(
+ const char* aCString) {
+ return {aCString};
+}
+
+// The contents of a (probably) unowned C string are serialized as the number of
+// characters (encoded as ULEB128) and all the characters in the string. The
+// terminal '\0' is omitted.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferUnownedCString(str.c_str()))`.
+//
+// No deserializer is provided for this pointer type, instead it must be
+// deserialized as one of the other string types that manages its contents,
+// e.g.: `aER.ReadObject<std::string>();`
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferUnownedCString> {
+ static Length Bytes(const ProfileBufferUnownedCString& aS) {
+ const auto len = strlen(aS.mCString);
+ return ULEB128Size(len) + len;
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const ProfileBufferUnownedCString& aS) {
+ const auto len = strlen(aS.mCString);
+ aEW.WriteULEB128(len);
+ aEW.WriteBytes(aS.mCString, len);
+ }
+};
+
+// ----------------------------------------------------------------------------
+// Raw pointers
+
+// Wrapper around a pointer to be serialized as the raw pointer value.
+template <typename T>
+struct ProfileBufferRawPointer {
+ T* mRawPointer;
+};
+
+// Wrap a pointer to be serialized as the raw pointer value.
+template <typename T>
+ProfileBufferRawPointer<T> WrapProfileBufferRawPointer(T* aRawPointer) {
+ return {aRawPointer};
+}
+
+// Raw pointers are serialized as the raw pointer value.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferRawPointer(ptr));`
+//
+// The wrapper is compulsory when writing pointers (to avoid unexpected
+// leaks/UAFs), but reading can be done straight into a raw pointer object,
+// e.g.: `aER.ReadObject<Foo*>;`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferRawPointer<T>> {
+ template <typename U>
+ static constexpr Length Bytes(const U&) {
+ return sizeof(T*);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const ProfileBufferRawPointer<T>& aWrapper) {
+ aEW.WriteBytes(&aWrapper.mRawPointer, sizeof(aWrapper.mRawPointer));
+ }
+};
+
+// Usage: `aER.ReadObject<Foo*>;` or `Foo* p; aER.ReadIntoObject(p);`, no
+// wrapper necessary.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferRawPointer<T>> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ ProfileBufferRawPointer<T>& aPtr) {
+ aER.ReadBytes(&aPtr.mRawPointer, sizeof(aPtr));
+ }
+
+ static ProfileBufferRawPointer<T> Read(ProfileBufferEntryReader& aER) {
+ ProfileBufferRawPointer<T> rawPointer;
+ ReadInto(aER, rawPointer);
+ return rawPointer;
+ }
+};
+
+// ----------------------------------------------------------------------------
+// std::string contents
+
+// std::string contents are serialized as the number of characters (encoded as
+// ULEB128) and all the characters in the string. The terminal '\0' is omitted.
+//
+// Usage: `std::string s = ...; aEW.WriteObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<std::basic_string<CHAR>> {
+ static Length Bytes(const std::basic_string<CHAR>& aS) {
+ const Length len = static_cast<Length>(aS.length());
+ return ULEB128Size(len) + len;
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const std::basic_string<CHAR>& aS) {
+ const Length len = static_cast<Length>(aS.length());
+ aEW.WriteULEB128(len);
+ aEW.WriteBytes(aS.c_str(), len * sizeof(CHAR));
+ }
+};
+
+// Usage: `std::string s = aEW.ReadObject<std::string>(s);` or
+// `std::string s; aER.ReadIntoObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<std::basic_string<CHAR>> {
+ static void ReadCharsInto(ProfileBufferEntryReader& aER,
+ std::basic_string<CHAR>& aS, size_t aLength) {
+ // Assign to `aS` by using iterators.
+ // (`aER+0` so we get the same iterator type as `aER+len`.)
+ aS.assign(aER, aER.EmptyIteratorAtOffset(aLength));
+ aER += aLength;
+ }
+
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ std::basic_string<CHAR>& aS) {
+ ReadCharsInto(
+ aER, aS,
+ aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+ }
+
+ static std::basic_string<CHAR> ReadChars(ProfileBufferEntryReader& aER,
+ size_t aLength) {
+ // Construct a string by using iterators.
+ // (`aER+0` so we get the same iterator type as `aER+len`.)
+ std::basic_string<CHAR> s(aER, aER.EmptyIteratorAtOffset(aLength));
+ aER += aLength;
+ return s;
+ }
+
+ static std::basic_string<CHAR> Read(ProfileBufferEntryReader& aER) {
+ return ReadChars(
+ aER, aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+ }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::UniqueFreePtr<CHAR>
+
+// UniqueFreePtr<CHAR>, which points at a string allocated with `malloc`
+// (typically generated by `strdup()`), is serialized as the number of
+// *bytes* (encoded as ULEB128) and all the characters in the string. The
+// null terminator is omitted.
+// `CHAR` can be any type that has a specialization for
+// `std::char_traits<CHAR>::length(const CHAR*)`.
+//
+// Note: A nullptr pointer will be serialized like an empty string, so when
+// deserializing it will result in an allocated buffer only containing a
+// single null terminator.
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<UniqueFreePtr<CHAR>> {
+ static Length Bytes(const UniqueFreePtr<CHAR>& aS) {
+ if (!aS) {
+ // Null pointer, store it as if it was an empty string (so: 0 bytes).
+ return ULEB128Size(0u);
+ }
+ // Note that we store the size in *bytes*, not in number of characters.
+ const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+ return ULEB128Size(bytes) + bytes;
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const UniqueFreePtr<CHAR>& aS) {
+ if (!aS) {
+ // Null pointer, store it as if it was an empty string (so we write a
+ // length of 0 bytes).
+ aEW.WriteULEB128(0u);
+ return;
+ }
+ // Note that we store the size in *bytes*, not in number of characters.
+ const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+ aEW.WriteULEB128(bytes);
+ aEW.WriteBytes(aS.get(), bytes);
+ }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<UniqueFreePtr<CHAR>> {
+ static void ReadInto(ProfileBufferEntryReader& aER, UniqueFreePtr<CHAR>& aS) {
+ aS = Read(aER);
+ }
+
+ static UniqueFreePtr<CHAR> Read(ProfileBufferEntryReader& aER) {
+ // Read the number of *bytes* that follow.
+ const auto bytes = aER.ReadULEB128<size_t>();
+ // We need a buffer of the non-const character type.
+ using NC_CHAR = std::remove_const_t<CHAR>;
+ // We allocate the required number of bytes, plus one extra character for
+ // the null terminator.
+ NC_CHAR* buffer = static_cast<NC_CHAR*>(malloc(bytes + sizeof(NC_CHAR)));
+ // Copy the characters into the buffer.
+ aER.ReadBytes(buffer, bytes);
+ // And append a null terminator.
+ buffer[bytes / sizeof(NC_CHAR)] = NC_CHAR(0);
+ return UniqueFreePtr<CHAR>(buffer);
+ }
+};
+
+// ----------------------------------------------------------------------------
+// std::tuple
+
+// std::tuple is serialized as a sequence of each recursively-serialized item.
+//
+// This is equivalent to manually serializing each item, so reading/writing
+// tuples is equivalent to reading/writing their elements in order, e.g.:
+// ```
+// std::tuple<int, std::string> is = ...;
+// aEW.WriteObject(is); // Write the tuple, equivalent to:
+// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is));
+// ...
+// // Reading back can be done directly into a tuple:
+// auto is = aER.ReadObject<std::tuple<int, std::string>>();
+// // Or each item could be read separately:
+// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>();
+// ```
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<std::tuple<Ts...>> {
+ private:
+ template <size_t... Is>
+ static Length TupleBytes(const std::tuple<Ts...>& aTuple,
+ std::index_sequence<Is...>) {
+ return (0 + ... + SumBytes(std::get<Is>(aTuple)));
+ }
+
+ template <size_t... Is>
+ static void TupleWrite(ProfileBufferEntryWriter& aEW,
+ const std::tuple<Ts...>& aTuple,
+ std::index_sequence<Is...>) {
+ (aEW.WriteObject(std::get<Is>(aTuple)), ...);
+ }
+
+ public:
+ static Length Bytes(const std::tuple<Ts...>& aTuple) {
+ // Generate a 0..N-1 index pack, we'll add the sizes of each item.
+ return TupleBytes(aTuple, std::index_sequence_for<Ts...>());
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const std::tuple<Ts...>& aTuple) {
+ // Generate a 0..N-1 index pack, we'll write each item.
+ TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>());
+ }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<std::tuple<Ts...>> {
+ template <size_t I>
+ static void TupleIReadInto(ProfileBufferEntryReader& aER,
+ std::tuple<Ts...>& aTuple) {
+ aER.ReadIntoObject(std::get<I>(aTuple));
+ }
+
+ template <size_t... Is>
+ static void TupleReadInto(ProfileBufferEntryReader& aER,
+ std::tuple<Ts...>& aTuple,
+ std::index_sequence<Is...>) {
+ (TupleIReadInto<Is>(aER, aTuple), ...);
+ }
+
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ std::tuple<Ts...>& aTuple) {
+ TupleReadInto(aER, aTuple, std::index_sequence_for<Ts...>());
+ }
+
+ static std::tuple<Ts...> Read(ProfileBufferEntryReader& aER) {
+ // Note that this creates default `Ts` first, and then overwrites them.
+ std::tuple<Ts...> ob;
+ ReadInto(aER, ob);
+ return ob;
+ }
+};
+// ----------------------------------------------------------------------------
+// mozilla::Span
+
+// Span. All elements are serialized in sequence.
+// The caller is assumed to know the number of elements (they may manually
+// write&read it before the span if needed).
+// Similar to tuples, reading/writing spans is equivalent to reading/writing
+// their elements in order.
+template <class T, size_t N>
+struct ProfileBufferEntryWriter::Serializer<Span<T, N>> {
+ static Length Bytes(const Span<T, N>& aSpan) {
+ Length bytes = 0;
+ for (const T& element : aSpan) {
+ bytes += SumBytes(element);
+ }
+ return bytes;
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW, const Span<T, N>& aSpan) {
+ for (const T& element : aSpan) {
+ aEW.WriteObject(element);
+ }
+ }
+};
+
+template <class T, size_t N>
+struct ProfileBufferEntryReader::Deserializer<Span<T, N>> {
+ // Read elements back into span pointing at a pre-allocated buffer.
+ static void ReadInto(ProfileBufferEntryReader& aER, Span<T, N>& aSpan) {
+ for (T& element : aSpan) {
+ aER.ReadIntoObject(element);
+ }
+ }
+
+ // A Span does not own its data, this would probably leak so we forbid this.
+ static Span<T, N> Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Maybe
+
+// Maybe<T> is serialized as one byte containing either 'm' (Nothing),
+// or 'M' followed by the recursively-serialized `T` object.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<Maybe<T>> {
+ static Length Bytes(const Maybe<T>& aMaybe) {
+ // 1 byte to store nothing/something flag, then object size if present.
+ return aMaybe.isNothing() ? 1 : (1 + SumBytes(aMaybe.ref()));
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW, const Maybe<T>& aMaybe) {
+ // 'm'/'M' is just an arbitrary 1-byte value to distinguish states.
+ if (aMaybe.isNothing()) {
+ aEW.WriteObject<char>('m');
+ } else {
+ aEW.WriteObject<char>('M');
+ // Use the Serializer for the contained type.
+ aEW.WriteObject(aMaybe.ref());
+ }
+ }
+};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<Maybe<T>> {
+ static void ReadInto(ProfileBufferEntryReader& aER, Maybe<T>& aMaybe) {
+ char c = aER.ReadObject<char>();
+ if (c == 'm') {
+ aMaybe.reset();
+ } else {
+ MOZ_ASSERT(c == 'M');
+ // If aMaybe is empty, create a default `T` first, to be overwritten.
+ // Otherwise we'll just overwrite whatever was already there.
+ if (aMaybe.isNothing()) {
+ aMaybe.emplace();
+ }
+ // Use the Deserializer for the contained type.
+ aER.ReadIntoObject(aMaybe.ref());
+ }
+ }
+
+ static Maybe<T> Read(ProfileBufferEntryReader& aER) {
+ Maybe<T> maybe;
+ char c = aER.ReadObject<char>();
+ MOZ_ASSERT(c == 'M' || c == 'm');
+ if (c == 'M') {
+ // Note that this creates a default `T` inside the Maybe first, and then
+ // overwrites it.
+ maybe = Some(T{});
+ // Use the Deserializer for the contained type.
+ aER.ReadIntoObject(maybe.ref());
+ }
+ return maybe;
+ }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Variant
+
+// Variant is serialized as the tag (0-based index of the stored type, encoded
+// as ULEB128), and the recursively-serialized object.
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<Variant<Ts...>> {
+ public:
+ static Length Bytes(const Variant<Ts...>& aVariantTs) {
+ return aVariantTs.match([](auto aIndex, const auto& aAlternative) {
+ return ULEB128Size(aIndex) + SumBytes(aAlternative);
+ });
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const Variant<Ts...>& aVariantTs) {
+ aVariantTs.match([&aEW](auto aIndex, const auto& aAlternative) {
+ aEW.WriteULEB128(aIndex);
+ aEW.WriteObject(aAlternative);
+ });
+ }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<Variant<Ts...>> {
+ private:
+ // Called from the fold expression in `VariantReadInto()`, only the selected
+ // variant will deserialize the object.
+ template <size_t I>
+ static void VariantIReadInto(ProfileBufferEntryReader& aER,
+ Variant<Ts...>& aVariantTs, unsigned aTag) {
+ if (I == aTag) {
+ // Ensure the variant contains the target type. Note that this may create
+ // a default object.
+ if (!aVariantTs.template is<I>()) {
+ aVariantTs = Variant<Ts...>(VariantIndex<I>{});
+ }
+ aER.ReadIntoObject(aVariantTs.template as<I>());
+ }
+ }
+
+ template <size_t... Is>
+ static void VariantReadInto(ProfileBufferEntryReader& aER,
+ Variant<Ts...>& aVariantTs,
+ std::index_sequence<Is...>) {
+ unsigned tag = aER.ReadULEB128<unsigned>();
+ (VariantIReadInto<Is>(aER, aVariantTs, tag), ...);
+ }
+
+ public:
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ Variant<Ts...>& aVariantTs) {
+ // Generate a 0..N-1 index pack, the selected variant will deserialize
+ // itself.
+ VariantReadInto(aER, aVariantTs, std::index_sequence_for<Ts...>());
+ }
+
+ static Variant<Ts...> Read(ProfileBufferEntryReader& aER) {
+ // Note that this creates a default `Variant` of the first type, and then
+ // overwrites it. Consider using `ReadInto` for more control if needed.
+ Variant<Ts...> variant(VariantIndex<0>{});
+ ReadInto(aER, variant);
+ return variant;
+ }
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferEntrySerialization_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferIndex.h b/mozglue/baseprofiler/public/ProfileBufferIndex.h
new file mode 100644
index 0000000000..5cda6bd89e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferIndex.h
@@ -0,0 +1,97 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferIndex_h
+#define ProfileBufferIndex_h
+
+#include "mozilla/Attributes.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mozilla {
+
+// Generic index into a Profiler buffer, mostly for internal usage.
+// Intended to appear infinite (it should effectively never wrap).
+// 0 (zero) is reserved as nullptr-like value; it may indicate failure result,
+// or it may point at the earliest available block.
+using ProfileBufferIndex = uint64_t;
+
+// Externally-opaque class encapsulating a block index, i.e. a
+// ProfileBufferIndex that is guaranteed to point at the start of a Profile
+// buffer block (until it is destroyed, but then that index cannot be reused and
+// functions should gracefully handle expired blocks).
+// Users may get these from Profile buffer functions, to later access previous
+// blocks; they should avoid converting and operating on their value.
+class ProfileBufferBlockIndex {
+ public:
+ // Default constructor with internal 0 value, for which Profile buffers must
+ // guarantee that it is before any valid entries; All public APIs should
+ // fail gracefully, doing and/or returning Nothing.
+ ProfileBufferBlockIndex() : mBlockIndex(0) {}
+
+ // Implicit conversion from literal `nullptr` to internal 0 value, to allow
+ // convenient init/reset/comparison with 0 index.
+ MOZ_IMPLICIT ProfileBufferBlockIndex(std::nullptr_t) : mBlockIndex(0) {}
+
+ // Explicit conversion to bool, works in `if` and other tests.
+ // Only returns false for default `ProfileBufferBlockIndex{}` value.
+ explicit operator bool() const { return mBlockIndex != 0; }
+
+ // Comparison operators. Default `ProfileBufferBlockIndex{}` value is always
+ // the lowest.
+ [[nodiscard]] bool operator==(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex == aRhs.mBlockIndex;
+ }
+ [[nodiscard]] bool operator!=(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex != aRhs.mBlockIndex;
+ }
+ [[nodiscard]] bool operator<(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex < aRhs.mBlockIndex;
+ }
+ [[nodiscard]] bool operator<=(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex <= aRhs.mBlockIndex;
+ }
+ [[nodiscard]] bool operator>(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex > aRhs.mBlockIndex;
+ }
+ [[nodiscard]] bool operator>=(const ProfileBufferBlockIndex& aRhs) const {
+ return mBlockIndex >= aRhs.mBlockIndex;
+ }
+
+ // Explicit conversion to ProfileBufferIndex, mostly used by internal Profile
+ // buffer code.
+ [[nodiscard]] ProfileBufferIndex ConvertToProfileBufferIndex() const {
+ return mBlockIndex;
+ }
+
+ // Explicit creation from ProfileBufferIndex, mostly used by internal
+ // Profile buffer code.
+ [[nodiscard]] static ProfileBufferBlockIndex CreateFromProfileBufferIndex(
+ ProfileBufferIndex aIndex) {
+ return ProfileBufferBlockIndex(aIndex);
+ }
+
+ private:
+ // Private to prevent easy construction from any value. Use
+ // `CreateFromProfileBufferIndex()` instead.
+ // The main reason for this indirection is to make it harder to create these
+ // objects, because only the profiler code should need to do it. Ideally, this
+ // class should be used wherever a block index should be stored, but there is
+ // so much code that uses `uint64_t` that it would be a big task to change
+ // them all. So for now we allow conversions to/from numbers, but it's as ugly
+ // as possible to make sure it doesn't get too common; and if one day we want
+ // to tackle a global change, it should be easy to find all these locations
+ // thanks to the explicit conversion functions.
+ explicit ProfileBufferBlockIndex(ProfileBufferIndex aBlockIndex)
+ : mBlockIndex(aBlockIndex) {}
+
+ ProfileBufferIndex mBlockIndex;
+};
+
+} // namespace mozilla
+
+#endif // ProfileBufferIndex_h
diff --git a/mozglue/baseprofiler/public/ProfileChunkedBuffer.h b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
new file mode 100644
index 0000000000..ded7fbd08b
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
@@ -0,0 +1,1560 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileChunkedBuffer_h
+#define ProfileChunkedBuffer_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/ProfileChunkedBufferDetail.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Unused.h"
+
+#include <utility>
+
+#ifdef DEBUG
+# include <cstdio>
+#endif
+
+namespace mozilla {
+
+// Thread-safe buffer that can store blocks of different sizes during defined
+// sessions, using Chunks (from a ChunkManager) as storage.
+//
+// Each *block* contains an *entry* and the entry size:
+// [ entry_size | entry ] [ entry_size | entry ] ...
+//
+// *In-session* is a period of time during which `ProfileChunkedBuffer` allows
+// reading and writing.
+// *Out-of-session*, the `ProfileChunkedBuffer` object is still valid, but
+// contains no data, and gracefully denies accesses.
+//
+// To write an entry, the buffer reserves a block of sufficient size (to contain
+// user data of predetermined size), writes the entry size, and lets the caller
+// fill the entry contents using a ProfileBufferEntryWriter. E.g.:
+// ```
+// ProfileChunkedBuffer cb(...);
+// cb.ReserveAndPut([]() { return sizeof(123); },
+// [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+// if (aEW) { aEW->WriteObject(123); }
+// });
+// ```
+// Other `Put...` functions may be used as shortcuts for simple entries.
+// The objects given to the caller's callbacks should only be used inside the
+// callbacks and not stored elsewhere, because they keep their own references to
+// chunk memory and therefore should not live longer.
+// Different type of objects may be serialized into an entry, see
+// `ProfileBufferEntryWriter::Serializer` for more information.
+//
+// When reading data, the buffer iterates over blocks (it knows how to read the
+// entry size, and therefore move to the next block), and lets the caller read
+// the entry inside of each block. E.g.:
+// ```
+// cb.ReadEach([](ProfileBufferEntryReader& aER) {
+// /* Use ProfileBufferEntryReader functions to read serialized objects. */
+// int n = aER.ReadObject<int>();
+// });
+// ```
+// Different type of objects may be deserialized from an entry, see
+// `ProfileBufferEntryReader::Deserializer` for more information.
+//
+// Writers may retrieve the block index corresponding to an entry
+// (`ProfileBufferBlockIndex` is an opaque type preventing the user from easily
+// modifying it). That index may later be used with `ReadAt` to get back to the
+// entry in that particular block -- if it still exists.
+class ProfileChunkedBuffer {
+ public:
+ using Byte = ProfileBufferChunk::Byte;
+ using Length = ProfileBufferChunk::Length;
+
+ enum class ThreadSafety { WithoutMutex, WithMutex };
+
+ // Default constructor starts out-of-session (nothing to read or write).
+ explicit ProfileChunkedBuffer(ThreadSafety aThreadSafety)
+ : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {}
+
+ // Start in-session with external chunk manager.
+ ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+ ProfileBufferChunkManager& aChunkManager)
+ : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+ SetChunkManager(aChunkManager);
+ }
+
+ // Start in-session with owned chunk manager.
+ ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+ UniquePtr<ProfileBufferChunkManager>&& aChunkManager)
+ : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+ SetChunkManager(std::move(aChunkManager));
+ }
+
+ ~ProfileChunkedBuffer() {
+ // Do proper clean-up by resetting the chunk manager.
+ ResetChunkManager();
+ }
+
+ // This cannot change during the lifetime of this buffer, so there's no need
+ // to lock.
+ [[nodiscard]] bool IsThreadSafe() const { return mMutex.IsActivated(); }
+
+ [[nodiscard]] bool IsInSession() const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return !!mChunkManager;
+ }
+
+ // Stop using the current chunk manager.
+ // If we own the current chunk manager, it will be destroyed.
+ // This will always clear currently-held chunks, if any.
+ void ResetChunkManager() {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ Unused << ResetChunkManager(lock);
+ }
+
+ // Set the current chunk manager.
+ // The caller is responsible for keeping the chunk manager alive as along as
+ // it's used here (until the next (Re)SetChunkManager, or
+ // ~ProfileChunkedBuffer).
+ void SetChunkManager(ProfileBufferChunkManager& aChunkManager) {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ Unused << ResetChunkManager(lock);
+ SetChunkManager(aChunkManager, lock);
+ }
+
+ // Set the current chunk manager, and keep ownership of it.
+ void SetChunkManager(UniquePtr<ProfileBufferChunkManager>&& aChunkManager) {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ Unused << ResetChunkManager(lock);
+ mOwnedChunkManager = std::move(aChunkManager);
+ if (mOwnedChunkManager) {
+ SetChunkManager(*mOwnedChunkManager, lock);
+ }
+ }
+
+ // Set the current chunk manager, except if it's already the one provided.
+ // The caller is responsible for keeping the chunk manager alive as along as
+ // it's used here (until the next (Re)SetChunkManager, or
+ // ~ProfileChunkedBuffer).
+ void SetChunkManagerIfDifferent(ProfileBufferChunkManager& aChunkManager) {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (!mChunkManager || mChunkManager != &aChunkManager) {
+ Unused << ResetChunkManager(lock);
+ SetChunkManager(aChunkManager, lock);
+ }
+ }
+
+ // Clear the contents of this buffer, ready to receive new chunks.
+ // Note that memory is not freed: No chunks are destroyed, they are all
+ // receycled.
+ // Also the range doesn't reset, instead it continues at some point after the
+ // previous range. This may be useful if the caller may be keeping indexes
+ // into old chunks that have now been cleared, using these indexes will fail
+ // gracefully (instead of potentially pointing into new data).
+ void Clear() {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return;
+ }
+
+ mRangeStart = mRangeEnd = mNextChunkRangeStart;
+ mPushedBlockCount = 0;
+ mClearedBlockCount = 0;
+ mFailedPutBytes = 0;
+
+ // Recycle all released chunks as "next" chunks. This will reduce the number
+ // of future allocations. Also, when using ProfileBufferChunkManagerSingle,
+ // this retrieves the one chunk if it was released.
+ UniquePtr<ProfileBufferChunk> releasedChunks =
+ mChunkManager->GetExtantReleasedChunks();
+ if (releasedChunks) {
+ // Released chunks should be in the "Done" state, they need to be marked
+ // "recycled" before they can be reused.
+ for (ProfileBufferChunk* chunk = releasedChunks.get(); chunk;
+ chunk = chunk->GetNext()) {
+ chunk->MarkRecycled();
+ }
+ mNextChunks = ProfileBufferChunk::Join(std::move(mNextChunks),
+ std::move(releasedChunks));
+ }
+
+ if (mCurrentChunk) {
+ // We already have a current chunk (empty or in-use), mark it "done" and
+ // then "recycled", ready to be reused.
+ mCurrentChunk->MarkDone();
+ mCurrentChunk->MarkRecycled();
+ } else {
+ if (!mNextChunks) {
+ // No current chunk, and no next chunks to recycle, nothing more to do.
+ // The next "Put" operation will try to allocate a chunk as needed.
+ return;
+ }
+
+ // No current chunk, take a next chunk.
+ mCurrentChunk = std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+ }
+
+ // Here, there was already a current chunk, or one has just been taken.
+ // Make sure it's ready to receive new entries.
+ InitializeCurrentChunk(lock);
+ }
+
+ // Buffer maximum length in bytes.
+ Maybe<size_t> BufferLength() const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (!mChunkManager) {
+ return Nothing{};
+ }
+ return Some(mChunkManager->MaxTotalSize());
+ }
+
+ [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return SizeOfExcludingThis(aMallocSizeOf, lock);
+ }
+
+ [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+ }
+
+ // Snapshot of the buffer state.
+ struct State {
+ // Index to/before the first block.
+ ProfileBufferIndex mRangeStart = 1;
+
+ // Index past the last block. Equals mRangeStart if empty.
+ ProfileBufferIndex mRangeEnd = 1;
+
+ // Number of blocks that have been pushed into this buffer.
+ uint64_t mPushedBlockCount = 0;
+
+ // Number of blocks that have been removed from this buffer.
+ // Note: Live entries = pushed - cleared.
+ uint64_t mClearedBlockCount = 0;
+
+ // Number of bytes that could not be put into this buffer.
+ uint64_t mFailedPutBytes = 0;
+ };
+
+ // Get a snapshot of the current state.
+ // When out-of-session, mFirstReadIndex==mNextWriteIndex, and
+ // mPushedBlockCount==mClearedBlockCount==0.
+ // Note that these may change right after this thread-safe call, so they
+ // should only be used for statistical purposes.
+ [[nodiscard]] State GetState() const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return {mRangeStart, mRangeEnd, mPushedBlockCount, mClearedBlockCount,
+ mFailedPutBytes};
+ }
+
+ // In in-session, return the start TimeStamp of the earliest chunk.
+ // If out-of-session, return a null TimeStamp.
+ [[nodiscard]] TimeStamp GetEarliestChunkStartTimeStamp() const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return {};
+ }
+ return mChunkManager->PeekExtantReleasedChunks(
+ [&](const ProfileBufferChunk* aOldestChunk) -> TimeStamp {
+ if (aOldestChunk) {
+ return aOldestChunk->ChunkHeader().mStartTimeStamp;
+ }
+ if (mCurrentChunk) {
+ return mCurrentChunk->ChunkHeader().mStartTimeStamp;
+ }
+ return {};
+ });
+ }
+
+ [[nodiscard]] bool IsEmpty() const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return mRangeStart == mRangeEnd;
+ }
+
+ // True if this buffer is already locked on this thread.
+ // This should be used if some functions may call an already-locked buffer,
+ // e.g.: Put -> memory hook -> profiler_add_native_allocation_marker -> Put.
+ [[nodiscard]] bool IsThreadSafeAndLockedOnCurrentThread() const {
+ return mMutex.IsActivatedAndLockedOnCurrentThread();
+ }
+
+ // Lock the buffer mutex and run the provided callback.
+ // This can be useful when the caller needs to explicitly lock down this
+ // buffer, but not do anything else with it.
+ template <typename Callback>
+ auto LockAndRun(Callback&& aCallback) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return std::forward<Callback>(aCallback)();
+ }
+
+ // Reserve a block that can hold an entry of the given `aCallbackEntryBytes()`
+ // size, write the entry size (ULEB128-encoded), and invoke and return
+ // `aCallback(Maybe<ProfileBufferEntryWriter>&)`.
+ // Note: `aCallbackEntryBytes` is a callback instead of a simple value, to
+ // delay this potentially-expensive computation until after we're checked that
+ // we're in-session; use `Put(Length, Callback)` below if you know the size
+ // already.
+ template <typename CallbackEntryBytes, typename Callback>
+ auto ReserveAndPut(CallbackEntryBytes&& aCallbackEntryBytes,
+ Callback&& aCallback)
+ -> decltype(std::forward<Callback>(aCallback)(
+ std::declval<Maybe<ProfileBufferEntryWriter>&>())) {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+ // This can only be read in the 2nd lambda below after it has been written
+ // by the first lambda.
+ Length entryBytes;
+
+ return ReserveAndPutRaw(
+ [&]() {
+ entryBytes = std::forward<CallbackEntryBytes>(aCallbackEntryBytes)();
+ MOZ_ASSERT(entryBytes != 0, "Empty entries are not allowed");
+ return ULEB128Size(entryBytes) + entryBytes;
+ },
+ [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+ if (aMaybeEntryWriter.isSome()) {
+ aMaybeEntryWriter->WriteULEB128(entryBytes);
+ MOZ_ASSERT(aMaybeEntryWriter->RemainingBytes() == entryBytes);
+ }
+ return std::forward<Callback>(aCallback)(aMaybeEntryWriter);
+ },
+ lock);
+ }
+
+ template <typename Callback>
+ auto Put(Length aEntryBytes, Callback&& aCallback) {
+ return ReserveAndPut([aEntryBytes]() { return aEntryBytes; },
+ std::forward<Callback>(aCallback));
+ }
+
+ // Add a new entry copied from the given buffer, return block index.
+ ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) {
+ return ReserveAndPut(
+ [aBytes]() { return aBytes; },
+ [aSrc, aBytes](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+ if (aMaybeEntryWriter.isNothing()) {
+ return ProfileBufferBlockIndex{};
+ }
+ aMaybeEntryWriter->WriteBytes(aSrc, aBytes);
+ return aMaybeEntryWriter->CurrentBlockIndex();
+ });
+ }
+
+ // Add a new single entry with *all* given object (using a Serializer for
+ // each), return block index.
+ template <typename... Ts>
+ ProfileBufferBlockIndex PutObjects(const Ts&... aTs) {
+ static_assert(sizeof...(Ts) > 0,
+ "PutObjects must be given at least one object.");
+ return ReserveAndPut(
+ [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); },
+ [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+ if (aMaybeEntryWriter.isNothing()) {
+ return ProfileBufferBlockIndex{};
+ }
+ aMaybeEntryWriter->WriteObjects(aTs...);
+ return aMaybeEntryWriter->CurrentBlockIndex();
+ });
+ }
+
+ // Add a new entry copied from the given object, return block index.
+ template <typename T>
+ ProfileBufferBlockIndex PutObject(const T& aOb) {
+ return PutObjects(aOb);
+ }
+
+ // Get *all* chunks related to this buffer, including extant chunks in its
+ // ChunkManager, and yet-unused new/recycled chunks.
+ // We don't expect this buffer to be used again, though it's still possible
+ // and will allocate the first buffer when needed.
+ [[nodiscard]] UniquePtr<ProfileBufferChunk> GetAllChunks() {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return nullptr;
+ }
+ UniquePtr<ProfileBufferChunk> chunks =
+ mChunkManager->GetExtantReleasedChunks();
+ Unused << HandleRequestedChunk_IsPending(lock);
+ if (MOZ_LIKELY(!!mCurrentChunk)) {
+ mCurrentChunk->MarkDone();
+ chunks =
+ ProfileBufferChunk::Join(std::move(chunks), std::move(mCurrentChunk));
+ }
+ chunks =
+ ProfileBufferChunk::Join(std::move(chunks), std::move(mNextChunks));
+ mChunkManager->ForgetUnreleasedChunks();
+ mRangeStart = mRangeEnd = mNextChunkRangeStart;
+ return chunks;
+ }
+
+ // True if the given index points inside the current chunk (up to the last
+ // written byte).
+ // This could be used to check if an index written now would have a good
+ // chance of referring to a previous block that has not been destroyed yet.
+ // But use with extreme care: This information may become incorrect right
+ // after this function returns, because new writes could start a new chunk.
+ [[nodiscard]] bool IsIndexInCurrentChunk(ProfileBufferIndex aIndex) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager || !mCurrentChunk)) {
+ // Out-of-session, or no current chunk.
+ return false;
+ }
+ return (mCurrentChunk->RangeStart() <= aIndex) &&
+ (aIndex < (mCurrentChunk->RangeStart() +
+ mCurrentChunk->OffsetPastLastBlock()));
+ }
+
+ class Reader;
+
+ // Class that can iterate through blocks and provide
+ // `ProfileBufferEntryReader`s.
+ // Created through `Reader`, lives within a lock guard lifetime.
+ class BlockIterator {
+ public:
+#ifdef DEBUG
+ ~BlockIterator() {
+ // No BlockIterator should live outside of a mutexed call.
+ mBuffer->mMutex.AssertCurrentThreadOwns();
+ }
+#endif // DEBUG
+
+ // Comparison with other iterator, mostly used in range-for loops.
+ [[nodiscard]] bool operator==(const BlockIterator& aRhs) const {
+ MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+ return mCurrentBlockIndex == aRhs.mCurrentBlockIndex;
+ }
+ [[nodiscard]] bool operator!=(const BlockIterator& aRhs) const {
+ MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+ return mCurrentBlockIndex != aRhs.mCurrentBlockIndex;
+ }
+
+ // Advance to next BlockIterator.
+ BlockIterator& operator++() {
+ mBuffer->mMutex.AssertCurrentThreadOwns();
+ mCurrentBlockIndex =
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mNextBlockPointer.GlobalRangePosition());
+ mCurrentEntry =
+ mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize());
+ return *this;
+ }
+
+ // Dereferencing creates a `ProfileBufferEntryReader` object for the entry
+ // inside this block.
+ // (Note: It would be possible to return a `const
+ // ProfileBufferEntryReader&`, but not useful in practice, because in most
+ // case the user will want to read, which is non-const.)
+ [[nodiscard]] ProfileBufferEntryReader operator*() const {
+ return mCurrentEntry;
+ }
+
+ // True if this iterator is just past the last entry.
+ [[nodiscard]] bool IsAtEnd() const {
+ return mCurrentEntry.RemainingBytes() == 0;
+ }
+
+ // Can be used as reference to come back to this entry with `GetEntryAt()`.
+ [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+ return mCurrentBlockIndex;
+ }
+
+ // Index past the end of this block, which is the start of the next block.
+ [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+ MOZ_ASSERT(!IsAtEnd());
+ return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mNextBlockPointer.GlobalRangePosition());
+ }
+
+ // Index of the first block in the whole buffer.
+ [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+ mBuffer->mMutex.AssertCurrentThreadOwns();
+ return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mBuffer->mRangeStart);
+ }
+
+ // Index past the last block in the whole buffer.
+ [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+ mBuffer->mMutex.AssertCurrentThreadOwns();
+ return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mBuffer->mRangeEnd);
+ }
+
+ private:
+ // Only a Reader can instantiate a BlockIterator.
+ friend class Reader;
+
+ BlockIterator(const ProfileChunkedBuffer& aBuffer,
+ const ProfileBufferChunk* aChunks0,
+ const ProfileBufferChunk* aChunks1,
+ ProfileBufferBlockIndex aBlockIndex)
+ : mNextBlockPointer(aChunks0, aChunks1, aBlockIndex),
+ mCurrentBlockIndex(
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mNextBlockPointer.GlobalRangePosition())),
+ mCurrentEntry(
+ mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize())),
+ mBuffer(WrapNotNull(&aBuffer)) {
+ // No BlockIterator should live outside of a mutexed call.
+ mBuffer->mMutex.AssertCurrentThreadOwns();
+ }
+
+ profiler::detail::InChunkPointer mNextBlockPointer;
+
+ ProfileBufferBlockIndex mCurrentBlockIndex;
+
+ ProfileBufferEntryReader mCurrentEntry;
+
+ // Using a non-null pointer instead of a reference, to allow copying.
+ // This BlockIterator should only live inside one of the thread-safe
+ // ProfileChunkedBuffer functions, for this reference to stay valid.
+ NotNull<const ProfileChunkedBuffer*> mBuffer;
+ };
+
+ // Class that can create `BlockIterator`s (e.g., for range-for), or just
+ // iterate through entries; lives within a lock guard lifetime.
+ class MOZ_RAII Reader {
+ public:
+ Reader(const Reader&) = delete;
+ Reader& operator=(const Reader&) = delete;
+ Reader(Reader&&) = delete;
+ Reader& operator=(Reader&&) = delete;
+
+#ifdef DEBUG
+ ~Reader() {
+ // No Reader should live outside of a mutexed call.
+ mBuffer.mMutex.AssertCurrentThreadOwns();
+ }
+#endif // DEBUG
+
+ // Index of the first block in the whole buffer.
+ [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+ mBuffer.mMutex.AssertCurrentThreadOwns();
+ return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mBuffer.mRangeStart);
+ }
+
+ // Index past the last block in the whole buffer.
+ [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+ mBuffer.mMutex.AssertCurrentThreadOwns();
+ return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ mBuffer.mRangeEnd);
+ }
+
+ // Iterators to the first and past-the-last blocks.
+ // Compatible with range-for (see `ForEach` below as example).
+ [[nodiscard]] BlockIterator begin() const {
+ return BlockIterator(mBuffer, mChunks0, mChunks1, nullptr);
+ }
+ // Note that a `BlockIterator` at the `end()` should not be dereferenced, as
+ // there is no actual block there!
+ [[nodiscard]] BlockIterator end() const {
+ return BlockIterator(mBuffer, nullptr, nullptr, nullptr);
+ }
+
+ // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to
+ // the stored range. Note that a `BlockIterator` at the `end()` should not
+ // be dereferenced, as there is no actual block there!
+ [[nodiscard]] BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const {
+ if (aBlockIndex < BufferRangeStart()) {
+ // Anything before the range (including null ProfileBufferBlockIndex) is
+ // clamped at the beginning.
+ return begin();
+ }
+ // Otherwise we at least expect the index to be valid (pointing exactly at
+ // a live block, or just past the end.)
+ return BlockIterator(mBuffer, mChunks0, mChunks1, aBlockIndex);
+ }
+
+ // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to
+ // last. Callback should not store `ProfileBufferEntryReader`, as it may
+ // become invalid after this thread-safe call.
+ template <typename Callback>
+ void ForEach(Callback&& aCallback) const {
+ for (ProfileBufferEntryReader reader : *this) {
+ aCallback(reader);
+ }
+ }
+
+ // If this reader only points at one chunk with some data, this data will be
+ // exposed as a single entry.
+ [[nodiscard]] ProfileBufferEntryReader SingleChunkDataAsEntry() {
+ const ProfileBufferChunk* onlyNonEmptyChunk = nullptr;
+ for (const ProfileBufferChunk* chunkList : {mChunks0, mChunks1}) {
+ for (const ProfileBufferChunk* chunk = chunkList; chunk;
+ chunk = chunk->GetNext()) {
+ if (chunk->OffsetFirstBlock() != chunk->OffsetPastLastBlock()) {
+ if (onlyNonEmptyChunk) {
+ // More than one non-empty chunk.
+ return ProfileBufferEntryReader();
+ }
+ onlyNonEmptyChunk = chunk;
+ }
+ }
+ }
+ if (!onlyNonEmptyChunk) {
+ // No non-empty chunks.
+ return ProfileBufferEntryReader();
+ }
+ // Here, we have found one chunk that had some data.
+ return ProfileBufferEntryReader(
+ onlyNonEmptyChunk->BufferSpan().FromTo(
+ onlyNonEmptyChunk->OffsetFirstBlock(),
+ onlyNonEmptyChunk->OffsetPastLastBlock()),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ onlyNonEmptyChunk->RangeStart()),
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ onlyNonEmptyChunk->RangeStart() +
+ (onlyNonEmptyChunk->OffsetPastLastBlock() -
+ onlyNonEmptyChunk->OffsetFirstBlock())));
+ }
+
+ private:
+ friend class ProfileChunkedBuffer;
+
+ explicit Reader(const ProfileChunkedBuffer& aBuffer,
+ const ProfileBufferChunk* aChunks0,
+ const ProfileBufferChunk* aChunks1)
+ : mBuffer(aBuffer), mChunks0(aChunks0), mChunks1(aChunks1) {
+ // No Reader should live outside of a mutexed call.
+ mBuffer.mMutex.AssertCurrentThreadOwns();
+ }
+
+ // This Reader should only live inside one of the thread-safe
+ // ProfileChunkedBuffer functions, for this reference to stay valid.
+ const ProfileChunkedBuffer& mBuffer;
+ const ProfileBufferChunk* mChunks0;
+ const ProfileBufferChunk* mChunks1;
+ };
+
+ // In in-session, call `aCallback(ProfileChunkedBuffer::Reader&)` and return
+ // true. Callback should not store `Reader`, because it may become invalid
+ // after this call.
+ // If out-of-session, return false (callback is not invoked).
+ template <typename Callback>
+ [[nodiscard]] auto Read(Callback&& aCallback) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return std::forward<Callback>(aCallback)(static_cast<Reader*>(nullptr));
+ }
+ return mChunkManager->PeekExtantReleasedChunks(
+ [&](const ProfileBufferChunk* aOldestChunk) {
+ Reader reader(*this, aOldestChunk, mCurrentChunk.get());
+ return std::forward<Callback>(aCallback)(&reader);
+ });
+ }
+
+ // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+ // on each entry, it must read or at least skip everything. Either/both chunk
+ // pointers may be null.
+ template <typename Callback>
+ static void ReadEach(const ProfileBufferChunk* aChunks0,
+ const ProfileBufferChunk* aChunks1,
+ Callback&& aCallback) {
+ static_assert(std::is_invocable_v<Callback, ProfileBufferEntryReader&> ||
+ std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+ ProfileBufferBlockIndex>,
+ "ReadEach callback must take ProfileBufferEntryReader& and "
+ "optionally a ProfileBufferBlockIndex");
+ profiler::detail::InChunkPointer p{aChunks0, aChunks1};
+ while (!p.IsNull()) {
+ // The position right before an entry size *is* a block index.
+ const ProfileBufferBlockIndex blockIndex =
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ p.GlobalRangePosition());
+ Length entrySize = p.ReadEntrySize();
+ if (entrySize == 0) {
+ return;
+ }
+ ProfileBufferEntryReader entryReader = p.EntryReader(entrySize);
+ if (entryReader.RemainingBytes() == 0) {
+ return;
+ }
+ MOZ_ASSERT(entryReader.RemainingBytes() == entrySize);
+ if constexpr (std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+ ProfileBufferBlockIndex>) {
+ aCallback(entryReader, blockIndex);
+ } else {
+ Unused << blockIndex;
+ aCallback(entryReader);
+ }
+ MOZ_ASSERT(entryReader.RemainingBytes() == 0);
+ }
+ }
+
+ // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+ // on each entry, it must read or at least skip everything.
+ template <typename Callback>
+ void ReadEach(Callback&& aCallback) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return;
+ }
+ mChunkManager->PeekExtantReleasedChunks(
+ [&](const ProfileBufferChunk* aOldestChunk) {
+ ReadEach(aOldestChunk, mCurrentChunk.get(),
+ std::forward<Callback>(aCallback));
+ });
+ }
+
+ // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+ // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+ // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+ // just past the last entry. Return whatever `aCallback` returns. Callback
+ // should not store `ProfileBufferEntryReader`, because it may become invalid
+ // after this call.
+ // Either/both chunk pointers may be null.
+ template <typename Callback>
+ [[nodiscard]] static auto ReadAt(ProfileBufferBlockIndex aMinimumBlockIndex,
+ const ProfileBufferChunk* aChunks0,
+ const ProfileBufferChunk* aChunks1,
+ Callback&& aCallback) {
+ static_assert(
+ std::is_invocable_v<Callback, Maybe<ProfileBufferEntryReader>&&>,
+ "ReadAt callback must take a Maybe<ProfileBufferEntryReader>&&");
+ Maybe<ProfileBufferEntryReader> maybeEntryReader;
+ if (profiler::detail::InChunkPointer p{aChunks0, aChunks1}; !p.IsNull()) {
+ // If the pointer position is before the given position, try to advance.
+ if (p.GlobalRangePosition() >=
+ aMinimumBlockIndex.ConvertToProfileBufferIndex() ||
+ p.AdvanceToGlobalRangePosition(
+ aMinimumBlockIndex.ConvertToProfileBufferIndex())) {
+ MOZ_ASSERT(p.GlobalRangePosition() >=
+ aMinimumBlockIndex.ConvertToProfileBufferIndex());
+ // Here we're pointing at the start of a block, try to read the entry
+ // size. (Entries cannot be empty, so 0 means failure.)
+ if (Length entrySize = p.ReadEntrySize(); entrySize != 0) {
+ maybeEntryReader.emplace(p.EntryReader(entrySize));
+ if (maybeEntryReader->RemainingBytes() == 0) {
+ // An empty entry reader means there was no complete block at the
+ // given index.
+ maybeEntryReader.reset();
+ } else {
+ MOZ_ASSERT(maybeEntryReader->RemainingBytes() == entrySize);
+ }
+ }
+ }
+ }
+#ifdef DEBUG
+ auto assertAllRead = MakeScopeExit([&]() {
+ MOZ_ASSERT(!maybeEntryReader || maybeEntryReader->RemainingBytes() == 0);
+ });
+#endif // DEBUG
+ return std::forward<Callback>(aCallback)(std::move(maybeEntryReader));
+ }
+
+ // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+ // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+ // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+ // just past the last entry. Return whatever `aCallback` returns. Callback
+ // should not store `ProfileBufferEntryReader`, because it may become invalid
+ // after this call.
+ template <typename Callback>
+ [[nodiscard]] auto ReadAt(ProfileBufferBlockIndex aBlockIndex,
+ Callback&& aCallback) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return std::forward<Callback>(aCallback)(Nothing{});
+ }
+ return mChunkManager->PeekExtantReleasedChunks(
+ [&](const ProfileBufferChunk* aOldestChunk) {
+ return ReadAt(aBlockIndex, aOldestChunk, mCurrentChunk.get(),
+ std::forward<Callback>(aCallback));
+ });
+ }
+
+ // Append the contents of another ProfileChunkedBuffer to this one.
+ ProfileBufferBlockIndex AppendContents(const ProfileChunkedBuffer& aSrc) {
+ ProfileBufferBlockIndex firstBlockIndex;
+ // If we start failing, we'll stop writing.
+ bool failed = false;
+ aSrc.ReadEach([&](ProfileBufferEntryReader& aER) {
+ if (failed) {
+ return;
+ }
+ failed =
+ !Put(aER.RemainingBytes(), [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+ if (aEW.isNothing()) {
+ return false;
+ }
+ if (!firstBlockIndex) {
+ firstBlockIndex = aEW->CurrentBlockIndex();
+ }
+ aEW->WriteFromReader(aER, aER.RemainingBytes());
+ return true;
+ });
+ });
+ return failed ? nullptr : firstBlockIndex;
+ }
+
+#ifdef DEBUG
+ void Dump(std::FILE* aFile = stdout) const {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ fprintf(aFile,
+ "ProfileChunkedBuffer[%p] State: range %u-%u pushed=%u cleared=%u "
+ "(live=%u) failed-puts=%u bytes",
+ this, unsigned(mRangeStart), unsigned(mRangeEnd),
+ unsigned(mPushedBlockCount), unsigned(mClearedBlockCount),
+ unsigned(mPushedBlockCount) - unsigned(mClearedBlockCount),
+ unsigned(mFailedPutBytes));
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ fprintf(aFile, " - Out-of-session\n");
+ return;
+ }
+ fprintf(aFile, " - chunks:\n");
+ bool hasChunks = false;
+ mChunkManager->PeekExtantReleasedChunks(
+ [&](const ProfileBufferChunk* aOldestChunk) {
+ for (const ProfileBufferChunk* chunk = aOldestChunk; chunk;
+ chunk = chunk->GetNext()) {
+ fprintf(aFile, "R ");
+ chunk->Dump(aFile);
+ hasChunks = true;
+ }
+ });
+ if (mCurrentChunk) {
+ fprintf(aFile, "C ");
+ mCurrentChunk->Dump(aFile);
+ hasChunks = true;
+ }
+ for (const ProfileBufferChunk* chunk = mNextChunks.get(); chunk;
+ chunk = chunk->GetNext()) {
+ fprintf(aFile, "N ");
+ chunk->Dump(aFile);
+ hasChunks = true;
+ }
+ switch (mRequestedChunkHolder->GetState()) {
+ case RequestedChunkRefCountedHolder::State::Unused:
+ fprintf(aFile, " - No request pending.\n");
+ break;
+ case RequestedChunkRefCountedHolder::State::Requested:
+ fprintf(aFile, " - Request pending.\n");
+ break;
+ case RequestedChunkRefCountedHolder::State::Fulfilled:
+ fprintf(aFile, " - Request fulfilled.\n");
+ break;
+ }
+ if (!hasChunks) {
+ fprintf(aFile, " No chunks.\n");
+ }
+ }
+#endif // DEBUG
+
+ private:
+ // Used to de/serialize a ProfileChunkedBuffer (e.g., containing a backtrace).
+ friend ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>;
+ friend ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer>;
+ friend ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>>;
+ friend ProfileBufferEntryReader::Deserializer<
+ UniquePtr<ProfileChunkedBuffer>>;
+
+ [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ResetChunkManager(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+ UniquePtr<ProfileBufferChunkManager> chunkManager;
+ if (mChunkManager) {
+ mRequestedChunkHolder = nullptr;
+ mChunkManager->ForgetUnreleasedChunks();
+#ifdef DEBUG
+ mChunkManager->DeregisteredFrom(this);
+#endif
+ mChunkManager = nullptr;
+ chunkManager = std::move(mOwnedChunkManager);
+ if (mCurrentChunk) {
+ mCurrentChunk->MarkDone();
+ mCurrentChunk = nullptr;
+ }
+ mNextChunks = nullptr;
+ mNextChunkRangeStart = mRangeEnd;
+ mRangeStart = mRangeEnd;
+ mPushedBlockCount = 0;
+ mClearedBlockCount = 0;
+ mFailedPutBytes = 0;
+ }
+ return chunkManager;
+ }
+
+ void SetChunkManager(
+ ProfileBufferChunkManager& aChunkManager,
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ MOZ_ASSERT(!mChunkManager);
+ mChunkManager = &aChunkManager;
+#ifdef DEBUG
+ mChunkManager->RegisteredWith(this);
+#endif
+
+ mChunkManager->SetChunkDestroyedCallback(
+ [this](const ProfileBufferChunk& aChunk) {
+ for (;;) {
+ ProfileBufferIndex rangeStart = mRangeStart;
+ if (MOZ_LIKELY(rangeStart <= aChunk.RangeStart())) {
+ if (MOZ_LIKELY(mRangeStart.compareExchange(
+ rangeStart,
+ aChunk.RangeStart() + aChunk.BufferBytes()))) {
+ break;
+ }
+ }
+ }
+ mClearedBlockCount += aChunk.BlockCount();
+ });
+
+ // We start with one chunk right away, and request a following one now
+ // so it should be available before the current chunk is full.
+ SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+ mRequestedChunkHolder = MakeRefPtr<RequestedChunkRefCountedHolder>();
+ RequestChunk(aLock);
+ }
+
+ [[nodiscard]] size_t SizeOfExcludingThis(
+ MallocSizeOf aMallocSizeOf,
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock&) const {
+ if (MOZ_UNLIKELY(!mChunkManager)) {
+ // Out-of-session.
+ return 0;
+ }
+ size_t size = mChunkManager->SizeOfIncludingThis(aMallocSizeOf);
+ if (mCurrentChunk) {
+ size += mCurrentChunk->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ if (mNextChunks) {
+ size += mNextChunks->SizeOfIncludingThis(aMallocSizeOf);
+ }
+ return size;
+ }
+
+ void InitializeCurrentChunk(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+ MOZ_ASSERT(!!mCurrentChunk);
+ mCurrentChunk->SetRangeStart(mNextChunkRangeStart);
+ mNextChunkRangeStart += mCurrentChunk->BufferBytes();
+ Unused << mCurrentChunk->ReserveInitialBlockAsTail(0);
+ }
+
+ void SetAndInitializeCurrentChunk(
+ UniquePtr<ProfileBufferChunk>&& aChunk,
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ mCurrentChunk = std::move(aChunk);
+ if (mCurrentChunk) {
+ InitializeCurrentChunk(aLock);
+ }
+ }
+
+ void RequestChunk(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ if (HandleRequestedChunk_IsPending(aLock)) {
+ // There is already a pending request, don't start a new one.
+ return;
+ }
+
+ // Ensure the `RequestedChunkHolder` knows we're starting a request.
+ mRequestedChunkHolder->StartRequest();
+
+ // Request a chunk, the callback carries a `RefPtr` of the
+ // `RequestedChunkHolder`, so it's guaranteed to live until it's invoked,
+ // even if this `ProfileChunkedBuffer` changes its `ChunkManager` or is
+ // destroyed.
+ mChunkManager->RequestChunk(
+ [requestedChunkHolder = RefPtr<RequestedChunkRefCountedHolder>(
+ mRequestedChunkHolder)](UniquePtr<ProfileBufferChunk> aChunk) {
+ requestedChunkHolder->AddRequestedChunk(std::move(aChunk));
+ });
+ }
+
+ [[nodiscard]] bool HandleRequestedChunk_IsPending(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ MOZ_ASSERT(!!mChunkManager);
+ MOZ_ASSERT(!!mRequestedChunkHolder);
+
+ if (mRequestedChunkHolder->GetState() ==
+ RequestedChunkRefCountedHolder::State::Unused) {
+ return false;
+ }
+
+ // A request is either in-flight or fulfilled.
+ Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk =
+ mRequestedChunkHolder->GetChunkIfFulfilled();
+ if (maybeChunk.isNothing()) {
+ // Request is still pending.
+ return true;
+ }
+
+ // Since we extracted the provided chunk, the holder should now be unused.
+ MOZ_ASSERT(mRequestedChunkHolder->GetState() ==
+ RequestedChunkRefCountedHolder::State::Unused);
+
+ // Request has been fulfilled.
+ UniquePtr<ProfileBufferChunk>& chunk = *maybeChunk;
+ if (chunk) {
+ // Try to use as current chunk if needed.
+ if (!mCurrentChunk) {
+ SetAndInitializeCurrentChunk(std::move(chunk), aLock);
+ // We've just received a chunk and made it current, request a next chunk
+ // for later.
+ MOZ_ASSERT(!mNextChunks);
+ RequestChunk(aLock);
+ return true;
+ }
+
+ if (!mNextChunks) {
+ mNextChunks = std::move(chunk);
+ } else {
+ mNextChunks->InsertNext(std::move(chunk));
+ }
+ }
+
+ return false;
+ }
+
+ // Get a pointer to the next chunk available
+ [[nodiscard]] ProfileBufferChunk* GetOrCreateCurrentChunk(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ ProfileBufferChunk* current = mCurrentChunk.get();
+ if (MOZ_UNLIKELY(!current)) {
+ // No current chunk ready.
+ MOZ_ASSERT(!mNextChunks,
+ "There shouldn't be next chunks when there is no current one");
+ // See if a request has recently been fulfilled, ignore pending status.
+ Unused << HandleRequestedChunk_IsPending(aLock);
+ current = mCurrentChunk.get();
+ if (MOZ_UNLIKELY(!current)) {
+ // There was no pending chunk, try to get one right now.
+ // This may still fail, but we can't do anything else about it, the
+ // caller must handle the nullptr case.
+ // Attempt a request for later.
+ SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+ current = mCurrentChunk.get();
+ }
+ }
+ return current;
+ }
+
+ // Get a pointer to the next chunk available
+ [[nodiscard]] ProfileBufferChunk* GetOrCreateNextChunk(
+ const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+ MOZ_ASSERT(!!mCurrentChunk,
+ "Why ask for a next chunk when there isn't even a current one?");
+ ProfileBufferChunk* next = mNextChunks.get();
+ if (MOZ_UNLIKELY(!next)) {
+ // No next chunk ready, see if a request has recently been fulfilled,
+ // ignore pending status.
+ Unused << HandleRequestedChunk_IsPending(aLock);
+ next = mNextChunks.get();
+ if (MOZ_UNLIKELY(!next)) {
+ // There was no pending chunk, try to get one right now.
+ mNextChunks = mChunkManager->GetChunk();
+ next = mNextChunks.get();
+ // This may still fail, but we can't do anything else about it, the
+ // caller must handle the nullptr case.
+ if (MOZ_UNLIKELY(!next)) {
+ // Attempt a request for later.
+ RequestChunk(aLock);
+ }
+ }
+ }
+ return next;
+ }
+
+ // Reserve a block of `aCallbackBlockBytes()` size, and invoke and return
+ // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+ // version that doesn't write the entry size at the beginning of the block.
+ // Note: `aCallbackBlockBytes` is a callback instead of a simple value, to
+ // delay this potentially-expensive computation until after we're checked that
+ // we're in-session; use `Put(Length, Callback)` below if you know the size
+ // already.
+ template <typename CallbackBlockBytes, typename Callback>
+ auto ReserveAndPutRaw(CallbackBlockBytes&& aCallbackBlockBytes,
+ Callback&& aCallback,
+ baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock,
+ uint64_t aBlockCount = 1) {
+ // The entry writer that will point into one or two chunks to write
+ // into, empty by default (failure).
+ Maybe<ProfileBufferEntryWriter> maybeEntryWriter;
+
+ // The current chunk will be filled if we need to write more than its
+ // remaining space.
+ bool currentChunkFilled = false;
+
+ // If the current chunk gets filled, we may or may not initialize the next
+ // chunk!
+ bool nextChunkInitialized = false;
+
+ if (MOZ_LIKELY(mChunkManager)) {
+ // In-session.
+
+ const Length blockBytes =
+ std::forward<CallbackBlockBytes>(aCallbackBlockBytes)();
+
+ if (ProfileBufferChunk* current = GetOrCreateCurrentChunk(aLock);
+ MOZ_LIKELY(current)) {
+ if (blockBytes <= current->RemainingBytes()) {
+ // Block fits in current chunk with only one span.
+ currentChunkFilled = blockBytes == current->RemainingBytes();
+ const auto [mem0, blockIndex] = current->ReserveBlock(blockBytes);
+ MOZ_ASSERT(mem0.LengthBytes() == blockBytes);
+ maybeEntryWriter.emplace(
+ mem0, blockIndex,
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+ MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+ mRangeEnd += blockBytes;
+ mPushedBlockCount += aBlockCount;
+ } else if (blockBytes >= current->BufferBytes()) {
+ // Currently only two buffer chunks are held at a time and it is not
+ // possible to write an object that takes up more space than this. In
+ // this scenario, silently discard this block of data if it is unable
+ // to fit into the two reserved profiler chunks.
+ mFailedPutBytes += blockBytes;
+ } else {
+ // Block doesn't fit fully in current chunk, it needs to overflow into
+ // the next one.
+ // Whether or not we can write this entry, the current chunk is now
+ // considered full, so it will be released. (Otherwise we could refuse
+ // this entry, but later accept a smaller entry into this chunk, which
+ // would be somewhat inconsistent.)
+ currentChunkFilled = true;
+ // Make sure the next chunk is available (from a previous request),
+ // otherwise create one on the spot.
+ if (ProfileBufferChunk* next = GetOrCreateNextChunk(aLock);
+ MOZ_LIKELY(next)) {
+ // Here, we know we have a current and a next chunk.
+ // Reserve head of block at the end of the current chunk.
+ const auto [mem0, blockIndex] =
+ current->ReserveBlock(current->RemainingBytes());
+ MOZ_ASSERT(mem0.LengthBytes() < blockBytes);
+ MOZ_ASSERT(current->RemainingBytes() == 0);
+ // Set the next chunk range, and reserve the needed space for the
+ // tail of the block.
+ next->SetRangeStart(mNextChunkRangeStart);
+ mNextChunkRangeStart += next->BufferBytes();
+ const auto mem1 = next->ReserveInitialBlockAsTail(
+ blockBytes - mem0.LengthBytes());
+ MOZ_ASSERT(next->RemainingBytes() != 0);
+ nextChunkInitialized = true;
+ // Block is split in two spans.
+ maybeEntryWriter.emplace(
+ mem0, mem1, blockIndex,
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+ MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+ mRangeEnd += blockBytes;
+ mPushedBlockCount += aBlockCount;
+ } else {
+ // Cannot get a new chunk. Record put failure.
+ mFailedPutBytes += blockBytes;
+ }
+ }
+ } else {
+ // Cannot get a current chunk. Record put failure.
+ mFailedPutBytes += blockBytes;
+ }
+ } // end of `if (MOZ_LIKELY(mChunkManager))`
+
+ // Here, we either have a `Nothing` (failure), or a non-empty entry writer
+ // pointing at the start of the block.
+
+ // After we invoke the callback and return, we may need to handle the
+ // current chunk being filled.
+ auto handleFilledChunk = MakeScopeExit([&]() {
+ // If the entry writer was not already empty, the callback *must* have
+ // filled the full entry.
+ MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0);
+
+ if (currentChunkFilled) {
+ // Extract current (now filled) chunk.
+ UniquePtr<ProfileBufferChunk> filled = std::move(mCurrentChunk);
+
+ if (mNextChunks) {
+ // Cycle to the next chunk.
+ mCurrentChunk =
+ std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+
+ // Make sure it is initialized (it is now the current chunk).
+ if (!nextChunkInitialized) {
+ InitializeCurrentChunk(aLock);
+ }
+ }
+
+ // And finally mark filled chunk done and release it.
+ filled->MarkDone();
+ mChunkManager->ReleaseChunk(std::move(filled));
+
+ // Request another chunk if needed.
+ // In most cases, here we should have one current chunk and no next
+ // chunk, so we want to do a request so there hopefully will be a next
+ // chunk available when the current one gets filled.
+ // But we also for a request if we don't even have a current chunk (if
+ // it's too late, it's ok because the next `ReserveAndPutRaw` wil just
+ // allocate one on the spot.)
+ // And if we already have a next chunk, there's no need for more now.
+ if (!mCurrentChunk || !mNextChunks) {
+ RequestChunk(aLock);
+ }
+ }
+ });
+
+ return std::forward<Callback>(aCallback)(maybeEntryWriter);
+ }
+
+ // Reserve a block of `aBlockBytes` size, and invoke and return
+ // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+ // version that doesn't write the entry size at the beginning of the block.
+ template <typename Callback>
+ auto ReserveAndPutRaw(Length aBlockBytes, Callback&& aCallback,
+ uint64_t aBlockCount) {
+ baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+ return ReserveAndPutRaw([aBlockBytes]() { return aBlockBytes; },
+ std::forward<Callback>(aCallback), lock,
+ aBlockCount);
+ }
+
+ // Mutex guarding the following members.
+ mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex;
+
+ // Pointer to the current Chunk Manager (or null when out-of-session.)
+ // It may be owned locally (see below) or externally.
+ ProfileBufferChunkManager* mChunkManager = nullptr;
+
+ // Only non-null when we own the current Chunk Manager.
+ UniquePtr<ProfileBufferChunkManager> mOwnedChunkManager;
+
+ UniquePtr<ProfileBufferChunk> mCurrentChunk;
+
+ UniquePtr<ProfileBufferChunk> mNextChunks;
+
+ // Class used to transfer requested chunks from a `ChunkManager` to a
+ // `ProfileChunkedBuffer`.
+ // It needs to be ref-counted because the request may be fulfilled
+ // asynchronously, and either side may be destroyed during the request.
+ // It cannot use the `ProfileChunkedBuffer` mutex, because that buffer and its
+ // mutex could be destroyed during the request.
+ class RequestedChunkRefCountedHolder {
+ public:
+ enum class State { Unused, Requested, Fulfilled };
+
+ // Get the current state. Note that it may change after the function
+ // returns, so it should be used carefully, e.g., `ProfileChunkedBuffer` can
+ // see if a request is pending or fulfilled, to avoid starting another
+ // request.
+ [[nodiscard]] State GetState() const {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ return mState;
+ }
+
+ // Must be called by `ProfileChunkedBuffer` when it requests a chunk.
+ // There cannot be more than one request in-flight.
+ void StartRequest() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ MOZ_ASSERT(mState == State::Unused, "Already requested or fulfilled");
+ mState = State::Requested;
+ }
+
+ // Must be called by the `ChunkManager` with a chunk.
+ // If the `ChunkManager` cannot provide a chunk (because of memory limits,
+ // or it gets destroyed), it must call this anyway with a nullptr.
+ void AddRequestedChunk(UniquePtr<ProfileBufferChunk>&& aChunk) {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ MOZ_ASSERT(mState == State::Requested);
+ mState = State::Fulfilled;
+ mRequestedChunk = std::move(aChunk);
+ }
+
+ // The `ProfileChunkedBuffer` can try to extract the provided chunk after a
+ // request:
+ // - Nothing -> Request is not fulfilled yet.
+ // - Some(nullptr) -> The `ChunkManager` was not able to provide a chunk.
+ // - Some(chunk) -> Requested chunk.
+ [[nodiscard]] Maybe<UniquePtr<ProfileBufferChunk>> GetChunkIfFulfilled() {
+ Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk;
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ MOZ_ASSERT(mState == State::Requested || mState == State::Fulfilled);
+ if (mState == State::Fulfilled) {
+ mState = State::Unused;
+ maybeChunk.emplace(std::move(mRequestedChunk));
+ }
+ return maybeChunk;
+ }
+
+ // Ref-counting implementation. Hand-rolled, because mozilla::RefCounted
+ // logs AddRefs and Releases in xpcom, but this object could be AddRef'd
+ // by the Base Profiler before xpcom starts, then Release'd by the Gecko
+ // Profiler in xpcom, leading to apparent negative leaks.
+
+ void AddRef() {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ ++mRefCount;
+ }
+
+ void Release() {
+ {
+ baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+ if (--mRefCount > 0) {
+ return;
+ }
+ }
+ delete this;
+ }
+
+ private:
+ ~RequestedChunkRefCountedHolder() = default;
+
+ // Mutex guarding the following members.
+ mutable baseprofiler::detail::BaseProfilerMutex mRequestMutex;
+ int mRefCount = 0;
+ State mState = State::Unused;
+ UniquePtr<ProfileBufferChunk> mRequestedChunk;
+ };
+
+ // Requested-chunk holder, kept alive when in-session, but may also live
+ // longer if a request is in-flight.
+ RefPtr<RequestedChunkRefCountedHolder> mRequestedChunkHolder;
+
+ // Range start of the next chunk to become current. Starting at 1 because
+ // 0 is a reserved index similar to nullptr.
+ ProfileBufferIndex mNextChunkRangeStart = 1;
+
+ // Index to the first block.
+ // Atomic because it may be increased when a Chunk is destroyed, and the
+ // callback may be invoked from anywhere, including from inside one of our
+ // locked section, so we cannot protect it with a mutex.
+ Atomic<ProfileBufferIndex, MemoryOrdering::ReleaseAcquire> mRangeStart{1};
+
+ // Index past the last block. Equals mRangeStart if empty.
+ ProfileBufferIndex mRangeEnd = 1;
+
+ // Number of blocks that have been pushed into this buffer.
+ uint64_t mPushedBlockCount = 0;
+
+ // Number of blocks that have been removed from this buffer.
+ // Note: Live entries = pushed - cleared.
+ // Atomic because it may be updated when a Chunk is destroyed, and the
+ // callback may be invoked from anywhere, including from inside one of our
+ // locked section, so we cannot protect it with a mutex.
+ Atomic<uint64_t, MemoryOrdering::ReleaseAcquire> mClearedBlockCount{0};
+
+ // Number of bytes that could not be put into this buffer.
+ uint64_t mFailedPutBytes = 0;
+};
+
+// ----------------------------------------------------------------------------
+// ProfileChunkedBuffer serialization
+
+// A ProfileChunkedBuffer can hide another one!
+// This will be used to store marker backtraces; They can be read back into a
+// UniquePtr<ProfileChunkedBuffer>.
+// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared
+// len==0 marks an out-of-session buffer, or empty buffer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer> {
+ static Length Bytes(const ProfileChunkedBuffer& aBuffer) {
+ return aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ if (!aReader) {
+ // Out-of-session, we only need 1 byte to store a length of 0.
+ return ULEB128Size<Length>(0);
+ }
+ ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+ const ProfileBufferIndex start =
+ reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+ const ProfileBufferIndex end =
+ reader.NextBlockIndex().ConvertToProfileBufferIndex();
+ MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+ const Length len = static_cast<Length>(end - start);
+ if (len == 0) {
+ // In-session but empty, also store a length of 0.
+ return ULEB128Size<Length>(0);
+ }
+ // In-session.
+ return static_cast<Length>(ULEB128Size(len) + sizeof(start) + len +
+ sizeof(aBuffer.mPushedBlockCount) +
+ sizeof(aBuffer.mClearedBlockCount));
+ });
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const ProfileChunkedBuffer& aBuffer) {
+ aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ if (!aReader) {
+ // Out-of-session, only store a length of 0.
+ aEW.WriteULEB128<Length>(0);
+ return;
+ }
+ ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+ const ProfileBufferIndex start =
+ reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+ const ProfileBufferIndex end =
+ reader.NextBlockIndex().ConvertToProfileBufferIndex();
+ MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+ const Length len = static_cast<Length>(end - start);
+ MOZ_ASSERT(len <= aEW.RemainingBytes());
+ if (len == 0) {
+ // In-session but empty, only store a length of 0.
+ aEW.WriteULEB128<Length>(0);
+ return;
+ }
+ // In-session.
+ // Store buffer length, and start index.
+ aEW.WriteULEB128(len);
+ aEW.WriteObject(start);
+ // Write all the bytes.
+ aEW.WriteFromReader(reader, reader.RemainingBytes());
+ // And write stats.
+ aEW.WriteObject(static_cast<uint64_t>(aBuffer.mPushedBlockCount));
+ aEW.WriteObject(static_cast<uint64_t>(aBuffer.mClearedBlockCount));
+ // Note: Failed pushes are not important to serialize.
+ });
+ }
+};
+
+// A serialized ProfileChunkedBuffer can be read into an empty buffer (either
+// out-of-session, or in-session with enough room).
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ ProfileChunkedBuffer& aBuffer) {
+ // Expect an empty buffer, as we're going to overwrite it.
+ MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd);
+ // Read the stored buffer length.
+ const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+ if (len == 0) {
+ // 0-length means an "uninteresting" buffer, just return now.
+ return;
+ }
+ // We have a non-empty buffer to read.
+
+ // Read start and end indices.
+ const auto start = aER.ReadObject<ProfileBufferIndex>();
+ aBuffer.mRangeStart = start;
+ // For now, set the end to be the start (the buffer is still empty). It will
+ // be updated in `ReserveAndPutRaw()` below.
+ aBuffer.mRangeEnd = start;
+
+ if (aBuffer.IsInSession()) {
+ // Output buffer is in-session (i.e., it already has a memory buffer
+ // attached). Make sure the caller allocated enough space.
+ MOZ_RELEASE_ASSERT(aBuffer.BufferLength().value() >= len);
+ } else {
+ // Output buffer is out-of-session, set a new chunk manager that will
+ // provide a single chunk of just the right size.
+ aBuffer.SetChunkManager(MakeUnique<ProfileBufferChunkManagerSingle>(len));
+ MOZ_ASSERT(aBuffer.BufferLength().value() >= len);
+ }
+
+ // Copy bytes into the buffer.
+ aBuffer.ReserveAndPutRaw(
+ len,
+ [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+ MOZ_RELEASE_ASSERT(aEW.isSome());
+ aEW->WriteFromReader(aER, len);
+ },
+ 0);
+ // Finally copy stats.
+ aBuffer.mPushedBlockCount = aER.ReadObject<uint64_t>();
+ aBuffer.mClearedBlockCount = aER.ReadObject<uint64_t>();
+ // Failed puts are not important to keep.
+ aBuffer.mFailedPutBytes = 0;
+ }
+
+ // We cannot output a ProfileChunkedBuffer object (not copyable), use
+ // `ReadInto()` or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead.
+ static ProfileChunkedBuffer Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// A ProfileChunkedBuffer is usually refererenced through a UniquePtr, for
+// convenience we support (de)serializing that UniquePtr directly.
+// This is compatible with the non-UniquePtr serialization above, with a null
+// pointer being treated like an out-of-session or empty buffer; and any of
+// these would be deserialized into a null pointer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>> {
+ static Length Bytes(const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+ if (!aBufferUPtr) {
+ // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+ return ULEB128Size<Length>(0);
+ }
+ // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+ // out-of-session or empty.)
+ return SumBytes(*aBufferUPtr);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+ if (!aBufferUPtr) {
+ // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+ aEW.WriteULEB128<Length>(0);
+ return;
+ }
+ // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+ // out-of-session or empty.)
+ aEW.WriteObject(*aBufferUPtr);
+ }
+};
+
+// Serialization of a raw pointer to ProfileChunkedBuffer.
+// Use Deserializer<UniquePtr<ProfileChunkedBuffer>> to read it back.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer*> {
+ static Length Bytes(ProfileChunkedBuffer* aBufferUPtr) {
+ if (!aBufferUPtr) {
+ // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+ return ULEB128Size<Length>(0);
+ }
+ // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+ // out-of-session or empty.)
+ return SumBytes(*aBufferUPtr);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ ProfileChunkedBuffer* aBufferUPtr) {
+ if (!aBufferUPtr) {
+ // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+ aEW.WriteULEB128<Length>(0);
+ return;
+ }
+ // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+ // out-of-session or empty.)
+ aEW.WriteObject(*aBufferUPtr);
+ }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<UniquePtr<ProfileChunkedBuffer>> {
+ static void ReadInto(ProfileBufferEntryReader& aER,
+ UniquePtr<ProfileChunkedBuffer>& aBuffer) {
+ aBuffer = Read(aER);
+ }
+
+ static UniquePtr<ProfileChunkedBuffer> Read(ProfileBufferEntryReader& aER) {
+ UniquePtr<ProfileChunkedBuffer> bufferUPtr;
+ // Keep a copy of the reader before reading the length, so we can restart
+ // from here below.
+ ProfileBufferEntryReader readerBeforeLen = aER;
+ // Read the stored buffer length.
+ const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+ if (len == 0) {
+ // 0-length means an "uninteresting" buffer, just return nullptr.
+ return bufferUPtr;
+ }
+ // We have a non-empty buffer.
+ // allocate an empty ProfileChunkedBuffer without mutex.
+ bufferUPtr = MakeUnique<ProfileChunkedBuffer>(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex);
+ // Rewind the reader before the length and deserialize the contents, using
+ // the non-UniquePtr Deserializer.
+ aER = readerBeforeLen;
+ aER.ReadIntoObject(*bufferUPtr);
+ return bufferUPtr;
+ }
+};
+
+} // namespace mozilla
+
+#endif // ProfileChunkedBuffer_h
diff --git a/mozglue/baseprofiler/public/ProfileChunkedBufferDetail.h b/mozglue/baseprofiler/public/ProfileChunkedBufferDetail.h
new file mode 100644
index 0000000000..75e461cd40
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileChunkedBufferDetail.h
@@ -0,0 +1,401 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileChunkedBufferDetail_h
+#define ProfileChunkedBufferDetail_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Likely.h"
+#include "mozilla/ProfileBufferChunk.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+
+namespace mozilla::profiler::detail {
+
+// Internal accessor pointing at a position inside a chunk.
+// It can handle two groups of chunks (typically the extant chunks stored in
+// the store manager, and the current chunk).
+// The main operations are:
+// - ReadEntrySize() to read an entry size, 0 means failure.
+// - operator+=(Length) to skip a number of bytes.
+// - EntryReader() creates an entry reader at the current position for a given
+// size (it may fail with an empty reader), and skips the entry.
+// Note that there is no "past-the-end" position -- as soon as InChunkPointer
+// reaches the end, it becomes effectively null.
+class InChunkPointer {
+ public:
+ using Byte = ProfileBufferChunk::Byte;
+ using Length = ProfileBufferChunk::Length;
+
+ // Nullptr-like InChunkPointer, may be used as end iterator.
+ InChunkPointer()
+ : mChunk(nullptr), mNextChunkGroup(nullptr), mOffsetInChunk(0) {}
+
+ // InChunkPointer over one or two chunk groups, pointing at the given
+ // block index (if still in range).
+ // This constructor should only be used with *trusted* block index values!
+ InChunkPointer(const ProfileBufferChunk* aChunk,
+ const ProfileBufferChunk* aNextChunkGroup,
+ ProfileBufferBlockIndex aBlockIndex)
+ : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+ if (mChunk) {
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ Adjust();
+ } else if (mNextChunkGroup) {
+ mChunk = mNextChunkGroup;
+ mNextChunkGroup = nullptr;
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ Adjust();
+ } else {
+ mOffsetInChunk = 0;
+ }
+
+ // Try to advance to given position.
+ if (!AdvanceToGlobalRangePosition(aBlockIndex)) {
+ // Block does not exist anymore (or block doesn't look valid), reset the
+ // in-chunk pointer.
+ mChunk = nullptr;
+ mNextChunkGroup = nullptr;
+ }
+ }
+
+ // InChunkPointer over one or two chunk groups, will start at the first
+ // block (if any). This may be slow, so avoid using it too much.
+ InChunkPointer(const ProfileBufferChunk* aChunk,
+ const ProfileBufferChunk* aNextChunkGroup,
+ ProfileBufferIndex aIndex = ProfileBufferIndex(0))
+ : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+ if (mChunk) {
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ Adjust();
+ } else if (mNextChunkGroup) {
+ mChunk = mNextChunkGroup;
+ mNextChunkGroup = nullptr;
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ Adjust();
+ } else {
+ mOffsetInChunk = 0;
+ }
+
+ // Try to advance to given position.
+ if (!AdvanceToGlobalRangePosition(aIndex)) {
+ // Block does not exist anymore, reset the in-chunk pointer.
+ mChunk = nullptr;
+ mNextChunkGroup = nullptr;
+ }
+ }
+
+ // Compute the current position in the global range.
+ // 0 if null (including if we're reached the end).
+ [[nodiscard]] ProfileBufferIndex GlobalRangePosition() const {
+ if (IsNull()) {
+ return 0;
+ }
+ return mChunk->RangeStart() + mOffsetInChunk;
+ }
+
+ // Move InChunkPointer forward to the block at the given global block
+ // position, which is assumed to be valid exactly -- but it may be obsolete.
+ // 0 stays where it is (if valid already).
+ // MOZ_ASSERTs if the index is invalid.
+ [[nodiscard]] bool AdvanceToGlobalRangePosition(
+ ProfileBufferBlockIndex aBlockIndex) {
+ if (IsNull()) {
+ // Pointer is null already. (Not asserting because it's acceptable.)
+ return false;
+ }
+ if (!aBlockIndex) {
+ // Special null position, just stay where we are.
+ return ShouldPointAtValidBlock();
+ }
+ if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+ // Past the requested position, stay where we are (assuming the current
+ // position was valid).
+ return ShouldPointAtValidBlock();
+ }
+ for (;;) {
+ if (aBlockIndex.ConvertToProfileBufferIndex() <
+ mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+ // Target position is in this chunk's written space, move to it.
+ mOffsetInChunk =
+ aBlockIndex.ConvertToProfileBufferIndex() - mChunk->RangeStart();
+ return ShouldPointAtValidBlock();
+ }
+ // Position is after this chunk, try next chunk.
+ GoToNextChunk();
+ if (IsNull()) {
+ return false;
+ }
+ // Skip whatever block tail there is, we don't allow pointing in the
+ // middle of a block.
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+ // Past the requested position, meaning that the given position was in-
+ // between blocks -> Failure.
+ MOZ_ASSERT(false, "AdvanceToGlobalRangePosition - In-between blocks");
+ return false;
+ }
+ }
+ }
+
+ // Move InChunkPointer forward to the block at or after the given global
+ // range position.
+ // 0 stays where it is (if valid already).
+ [[nodiscard]] bool AdvanceToGlobalRangePosition(
+ ProfileBufferIndex aPosition) {
+ if (aPosition == 0) {
+ // Special position '0', just stay where we are.
+ // Success if this position is already valid.
+ return !IsNull();
+ }
+ for (;;) {
+ ProfileBufferIndex currentPosition = GlobalRangePosition();
+ if (currentPosition == 0) {
+ // Pointer is null.
+ return false;
+ }
+ if (aPosition <= currentPosition) {
+ // At or past the requested position, stay where we are.
+ return true;
+ }
+ if (aPosition < mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+ // Target position is in this chunk's written space, move to it.
+ for (;;) {
+ // Skip the current block.
+ mOffsetInChunk += ReadEntrySize();
+ if (mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+ // Reached the end of the chunk, this can happen for the last
+ // block, let's just continue to the next chunk.
+ break;
+ }
+ if (aPosition <= mChunk->RangeStart() + mOffsetInChunk) {
+ // We're at or after the position, return at this block position.
+ return true;
+ }
+ }
+ }
+ // Position is after this chunk, try next chunk.
+ GoToNextChunk();
+ if (IsNull()) {
+ return false;
+ }
+ // Skip whatever block tail there is, we don't allow pointing in the
+ // middle of a block.
+ mOffsetInChunk = mChunk->OffsetFirstBlock();
+ }
+ }
+
+ [[nodiscard]] Byte ReadByte() {
+ MOZ_ASSERT(!IsNull());
+ MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+ Byte byte = mChunk->ByteAt(mOffsetInChunk);
+ if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+ Adjust();
+ }
+ return byte;
+ }
+
+ // Read and skip a ULEB128-encoded size.
+ // 0 means failure (0-byte entries are not allowed.)
+ // Note that this doesn't guarantee that there are actually that many bytes
+ // available to read! (EntryReader() below may gracefully fail.)
+ [[nodiscard]] Length ReadEntrySize() {
+ ULEB128Reader<Length> reader;
+ if (IsNull()) {
+ return 0;
+ }
+ for (;;) {
+ const bool isComplete = reader.FeedByteIsComplete(ReadByte());
+ if (MOZ_UNLIKELY(IsNull())) {
+ // End of chunks, so there's no actual entry after this anyway.
+ return 0;
+ }
+ if (MOZ_LIKELY(isComplete)) {
+ if (MOZ_UNLIKELY(reader.Value() > mChunk->BufferBytes())) {
+ // Don't allow entries larger than a chunk.
+ return 0;
+ }
+ return reader.Value();
+ }
+ }
+ }
+
+ InChunkPointer& operator+=(Length aLength) {
+ MOZ_ASSERT(!IsNull());
+ mOffsetInChunk += aLength;
+ Adjust();
+ return *this;
+ }
+
+ [[nodiscard]] ProfileBufferEntryReader EntryReader(Length aLength) {
+ if (IsNull() || aLength == 0) {
+ return ProfileBufferEntryReader();
+ }
+
+ MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+
+ // We should be pointing at the entry, past the entry size.
+ const ProfileBufferIndex entryIndex = GlobalRangePosition();
+ // Verify that there's enough space before for the size (starting at index
+ // 1 at least).
+ MOZ_ASSERT(entryIndex >= 1u + ULEB128Size(aLength));
+
+ const Length remaining = mChunk->OffsetPastLastBlock() - mOffsetInChunk;
+ Span<const Byte> mem0 = mChunk->BufferSpan();
+ mem0 = mem0.From(mOffsetInChunk);
+ if (aLength <= remaining) {
+ // Move to the end of this block, which could make this null if we have
+ // reached the end of all buffers.
+ *this += aLength;
+ return ProfileBufferEntryReader(
+ mem0.To(aLength),
+ // Block starts before the entry size.
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ entryIndex - ULEB128Size(aLength)),
+ // Block ends right after the entry (could be null for last entry).
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ GlobalRangePosition()));
+ }
+
+ // We need to go to the next chunk for the 2nd part of this block.
+ GoToNextChunk();
+ if (IsNull()) {
+ return ProfileBufferEntryReader();
+ }
+
+ Span<const Byte> mem1 = mChunk->BufferSpan();
+ const Length tail = aLength - remaining;
+ MOZ_ASSERT(tail <= mChunk->BufferBytes());
+ MOZ_ASSERT(tail == mChunk->OffsetFirstBlock());
+ // We are in the correct chunk, move the offset to the end of the block.
+ mOffsetInChunk = tail;
+ // And adjust as needed, which could make this null if we have reached the
+ // end of all buffers.
+ Adjust();
+ return ProfileBufferEntryReader(
+ mem0, mem1.To(tail),
+ // Block starts before the entry size.
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ entryIndex - ULEB128Size(aLength)),
+ // Block ends right after the entry (could be null for last entry).
+ ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ GlobalRangePosition()));
+ }
+
+ [[nodiscard]] bool IsNull() const { return !mChunk; }
+
+ [[nodiscard]] bool operator==(const InChunkPointer& aOther) const {
+ if (IsNull() || aOther.IsNull()) {
+ return IsNull() && aOther.IsNull();
+ }
+ return mChunk == aOther.mChunk && mOffsetInChunk == aOther.mOffsetInChunk;
+ }
+
+ [[nodiscard]] bool operator!=(const InChunkPointer& aOther) const {
+ return !(*this == aOther);
+ }
+
+ [[nodiscard]] Byte operator*() const {
+ MOZ_ASSERT(!IsNull());
+ MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+ return mChunk->ByteAt(mOffsetInChunk);
+ }
+
+ InChunkPointer& operator++() {
+ MOZ_ASSERT(!IsNull());
+ MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+ if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+ mOffsetInChunk = 0;
+ GoToNextChunk();
+ Adjust();
+ }
+ return *this;
+ }
+
+ private:
+ void GoToNextChunk() {
+ MOZ_ASSERT(!IsNull());
+ const ProfileBufferIndex expectedNextRangeStart =
+ mChunk->RangeStart() + mChunk->BufferBytes();
+
+ mChunk = mChunk->GetNext();
+ if (!mChunk) {
+ // Reached the end of the current chunk group, try the next one (which
+ // may be null too, especially on the 2nd try).
+ mChunk = mNextChunkGroup;
+ mNextChunkGroup = nullptr;
+ }
+
+ if (mChunk && mChunk->RangeStart() == 0) {
+ // Reached a chunk without a valid (non-null) range start, assume there
+ // are only unused chunks from here on.
+ mChunk = nullptr;
+ }
+
+ MOZ_ASSERT(!mChunk || mChunk->RangeStart() == expectedNextRangeStart,
+ "We don't handle discontinuous buffers (yet)");
+ // Non-DEBUG fallback: Stop reading past discontinuities.
+ // (They should be rare, only happening on temporary OOMs.)
+ // TODO: Handle discontinuities (by skipping over incomplete blocks).
+ if (mChunk && mChunk->RangeStart() != expectedNextRangeStart) {
+ mChunk = nullptr;
+ }
+ }
+
+ // We want `InChunkPointer` to always point at a valid byte (or be null).
+ // After some operations, `mOffsetInChunk` may point past the end of the
+ // current `mChunk`, in which case we need to adjust our position to be inside
+ // the appropriate chunk. E.g., if we're 10 bytes after the end of the current
+ // chunk, we should end up at offset 10 in the next chunk.
+ // Note that we may "fall off" the last chunk and make this `InChunkPointer`
+ // effectively null.
+ void Adjust() {
+ while (mChunk && mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+ // TODO: Try to adjust offset between chunks relative to mRangeStart
+ // differences. But we don't handle discontinuities yet.
+ if (mOffsetInChunk < mChunk->BufferBytes()) {
+ mOffsetInChunk -= mChunk->BufferBytes();
+ } else {
+ mOffsetInChunk -= mChunk->OffsetPastLastBlock();
+ }
+ GoToNextChunk();
+ }
+ }
+
+ // Check if the current position is likely to point at a valid block.
+ // (Size should be reasonable, and block should fully fit inside buffer.)
+ // MOZ_ASSERTs on failure, to catch incorrect uses of block indices (which
+ // should only point at valid blocks if still in range). Non-asserting build
+ // fallback should still be handled.
+ [[nodiscard]] bool ShouldPointAtValidBlock() const {
+ if (IsNull()) {
+ // Pointer is null, no blocks here.
+ MOZ_ASSERT(false, "ShouldPointAtValidBlock - null pointer");
+ return false;
+ }
+ // Use a copy, so we don't modify `*this`.
+ InChunkPointer pointer = *this;
+ // Try to read the entry size.
+ Length entrySize = pointer.ReadEntrySize();
+ if (entrySize == 0) {
+ // Entry size of zero means we read 0 or a way-too-big value.
+ MOZ_ASSERT(false, "ShouldPointAtValidBlock - invalid size");
+ return false;
+ }
+ // See if the last byte of the entry is still inside the buffer.
+ pointer += entrySize - 1;
+ MOZ_ASSERT(!pointer.IsNull(),
+ "ShouldPointAtValidBlock - past end of buffer");
+ return !pointer.IsNull();
+ }
+
+ const ProfileBufferChunk* mChunk;
+ const ProfileBufferChunk* mNextChunkGroup;
+ Length mOffsetInChunk;
+};
+
+} // namespace mozilla::profiler::detail
+
+#endif // ProfileChunkedBufferDetail_h
diff --git a/mozglue/baseprofiler/public/ProfilerBufferSize.h b/mozglue/baseprofiler/public/ProfilerBufferSize.h
new file mode 100644
index 0000000000..d77d869a68
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfilerBufferSize.h
@@ -0,0 +1,60 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerBufferSize_h
+#define ProfilerBufferSize_h
+
+#include "mozilla/ProfileBufferChunkManager.h"
+
+// We need to decide how many chunks of what size we want to fit in the given
+// total maximum capacity for this process, in the (likely) context of
+// multiple processes doing the same choice and having an inter-process
+// mechanism to control the overall memory limit.
+
+// The buffer size is provided as a number of "entries", this is their size in
+// bytes.
+constexpr static uint32_t scBytesPerEntry = 8;
+
+// Minimum chunk size allowed, enough for at least one stack.
+constexpr static uint32_t scMinimumChunkSize =
+ 2 * mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize;
+
+// Ideally we want at least 2 unreleased chunks to work with (1 current and 1
+// next), and 2 released chunks (so that one can be recycled when old, leaving
+// one with some data).
+constexpr static uint32_t scMinimumNumberOfChunks = 4;
+
+// And we want to limit chunks to a maximum size, which is a compromise
+// between:
+// - A big size, which helps with reducing the rate of allocations and IPCs.
+// - A small size, which helps with equalizing the duration of recorded data
+// (as the inter-process controller will discard the oldest chunks in all
+// Firefox processes).
+constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
+
+// Limit to 128MiB as a lower buffer size usually isn't enough.
+constexpr static uint32_t scMinimumBufferSize = 128u * 1024u * 1024u;
+// Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
+// https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
+constexpr static uint32_t scMinimumBufferEntries =
+ scMinimumBufferSize / scBytesPerEntry;
+
+// Limit to 2GiB.
+constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
+constexpr static uint32_t scMaximumBufferEntries =
+ scMaximumBufferSize / scBytesPerEntry;
+
+constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
+ if (aEntries <= scMinimumBufferEntries) {
+ return scMinimumBufferEntries;
+ }
+ if (aEntries >= scMaximumBufferEntries) {
+ return scMaximumBufferEntries;
+ }
+ return aEntries;
+}
+
+#endif // ProfilerBufferSize_h
diff --git a/mozglue/baseprofiler/public/ProgressLogger.h b/mozglue/baseprofiler/public/ProgressLogger.h
new file mode 100644
index 0000000000..e15095faaa
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProgressLogger.h
@@ -0,0 +1,500 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProgressLogger_h
+#define ProgressLogger_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/ProportionValue.h"
+#include "mozilla/RefCounted.h"
+#include "mozilla/RefPtr.h"
+
+#include <atomic>
+
+// Uncomment to printf ProcessLogger updates.
+// #define DEBUG_PROCESSLOGGER
+
+#ifdef DEBUG_PROCESSLOGGER
+# include "mozilla/BaseProfilerUtils.h"
+# include <cstdio>
+#endif // DEBUG_PROCESSLOGGER
+
+namespace mozilla {
+
+// A `ProgressLogger` is used to update a referenced atomic `ProportionValue`,
+// and can recursively create a sub-logger corresponding to a subset of their
+// own range, but that sub-logger's updates are done in its local 0%-100% range.
+// The typical usage is for multi-level tasks, where each level can estimate its
+// own work and the work delegated to a next-level function, without knowing how
+// this local work relates to the higher-level total work. See
+// `CreateSubLoggerFromTo` for details.
+// Note that this implementation is single-threaded, it does not support logging
+// progress from multiple threads at the same time.
+class ProgressLogger {
+ public:
+ // An RefPtr'd object of this class is used as the target of all
+ // ProgressLogger updates, and it may be shared to make these updates visible
+ // from other code in any thread.
+ class SharedProgress : public external::AtomicRefCounted<SharedProgress> {
+ public:
+ MOZ_DECLARE_REFCOUNTED_TYPENAME(SharedProgress)
+
+ SharedProgress() = default;
+
+ SharedProgress(const SharedProgress&) = delete;
+ SharedProgress& operator=(const SharedProgress&) = delete;
+
+ // This constant is used to indicate that an update may change the progress
+ // value, but should not modify the previously-recorded location.
+ static constexpr const char* NO_LOCATION_UPDATE = nullptr;
+
+ // Set the current progress and location, but the previous location is not
+ // overwritten if the new one is null or empty.
+ // The location and then the progress are atomically "released", so that all
+ // preceding writes on this thread will be visible to other threads reading
+ // these values; most importantly when reaching 100% progress, the reader
+ // can be confident that the location is final and the operation being
+ // watched has completed.
+ void SetProgress(
+ ProportionValue aProgress,
+ const char* aLocationOrNullEmptyToIgnore = NO_LOCATION_UPDATE) {
+ if (aLocationOrNullEmptyToIgnore &&
+ *aLocationOrNullEmptyToIgnore != '\0') {
+ mLastLocation.store(aLocationOrNullEmptyToIgnore,
+ std::memory_order_release);
+ }
+ mProgress.store(aProgress, std::memory_order_release);
+ }
+
+ // Read the current progress value. Atomically "acquired", so that writes
+ // from the thread that stored this value are all visible to the reader
+ // here; most importantly when reaching 100%, we can be confident that the
+ // location is final and the operation being watched has completed.
+ [[nodiscard]] ProportionValue Progress() const {
+ return mProgress.load(std::memory_order_acquire);
+ }
+
+ // Read the current progress value. Atomically "acquired".
+ [[nodiscard]] const char* LastLocation() const {
+ return mLastLocation.load(std::memory_order_acquire);
+ }
+
+ private:
+ friend mozilla::detail::RefCounted<SharedProgress,
+ mozilla::detail::AtomicRefCount>;
+ ~SharedProgress() = default;
+
+ // Progress and last-known location.
+ // Beware that these two values are not strongly tied: Reading one then the
+ // other may give mismatched information; but it should be fine for
+ // informational usage.
+ // They are stored using atomic acquire-release ordering, to guarantee that
+ // when read, all writes preceding these values are visible.
+ std::atomic<ProportionValue> mProgress = ProportionValue{0.0};
+ std::atomic<const char*> mLastLocation = nullptr;
+ };
+
+ static constexpr const char* NO_LOCATION_UPDATE =
+ SharedProgress::NO_LOCATION_UPDATE;
+
+ ProgressLogger() = default;
+
+ // Construct a top-level logger, starting at 0% and expected to end at 100%.
+ explicit ProgressLogger(
+ RefPtr<SharedProgress> aGlobalProgressOrNull,
+ const char* aLocationOrNullEmptyToIgnoreAtStart = NO_LOCATION_UPDATE,
+ const char* aLocationOrNullEmptyToIgnoreAtEnd = NO_LOCATION_UPDATE)
+ : ProgressLogger{std::move(aGlobalProgressOrNull),
+ /* Start */ ProportionValue{0.0},
+ /* Multiplier */ ProportionValue{1.0},
+ aLocationOrNullEmptyToIgnoreAtStart,
+ aLocationOrNullEmptyToIgnoreAtEnd} {}
+
+ // Don't make copies, it would be confusing!
+ // TODO: Copies could one day be allowed to track multi-threaded work, but it
+ // is outside the scope of this implementation; Please update if needed.
+ ProgressLogger(const ProgressLogger&) = delete;
+ ProgressLogger& operator&(const ProgressLogger&) = delete;
+
+ // Move-construct is allowed, to return from CreateSubLoggerFromTo, and
+ // forward straight into a function. Note that moved-from ProgressLoggers must
+ // not be used anymore! Use `CreateSubLoggerFromTo` to pass a sub-logger to
+ // functions.
+ ProgressLogger(ProgressLogger&& aOther)
+ : mGlobalProgressOrNull(std::move(aOther.mGlobalProgressOrNull)),
+ mLocalStartInGlobalSpace(aOther.mLocalStartInGlobalSpace),
+ mLocalToGlobalMultiplier(aOther.mLocalToGlobalMultiplier),
+ mLocationAtDestruction(aOther.mLocationAtDestruction) {
+ aOther.MarkMovedFrom();
+#ifdef DEBUG_PROCESSLOGGER
+ if (mGlobalProgressOrNull) {
+ printf("[%d] Moved (staying globally at %.2f in [%.2f, %.2f])\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()),
+ GetGlobalProgress().ToDouble() * 100.0,
+ mLocalStartInGlobalSpace.ToDouble() * 100.0,
+ (mLocalStartInGlobalSpace + mLocalToGlobalMultiplier).ToDouble() *
+ 100.0);
+ }
+#endif // DEBUG_PROCESSLOGGER
+ }
+
+ // Move-assign. This may be useful when starting with a default (empty) logger
+ // and later assigning it a progress value to start updating.
+ ProgressLogger& operator=(ProgressLogger&& aOther) {
+ mGlobalProgressOrNull = std::move(aOther.mGlobalProgressOrNull);
+ mLocalStartInGlobalSpace = aOther.mLocalStartInGlobalSpace;
+ mLocalToGlobalMultiplier = aOther.mLocalToGlobalMultiplier;
+ mLocationAtDestruction = aOther.mLocationAtDestruction;
+ aOther.MarkMovedFrom();
+#ifdef DEBUG_PROCESSLOGGER
+ if (mGlobalProgressOrNull) {
+ printf("[%d] Re-assigned (globally at %.2f in [%.2f, %.2f])\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()),
+ GetGlobalProgress().ToDouble() * 100.0,
+ mLocalStartInGlobalSpace.ToDouble() * 100.0,
+ (mLocalStartInGlobalSpace + mLocalToGlobalMultiplier).ToDouble() *
+ 100.0);
+ }
+#endif // DEBUG_PROCESSLOGGER
+ return *this;
+ }
+
+ // Destruction sets the local update value to 100% unless empty or moved-from.
+ ~ProgressLogger() {
+ if (!IsMovedFrom()) {
+#ifdef DEBUG_PROCESSLOGGER
+ if (mGlobalProgressOrNull) {
+ printf("[%d] Destruction:\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()));
+ }
+#endif // DEBUG_PROCESSLOGGER
+ SetLocalProgress(ProportionValue{1.0}, mLocationAtDestruction);
+ }
+ }
+
+ // Retrieve the current progress in the global space. May be invalid.
+ [[nodiscard]] ProportionValue GetGlobalProgress() const {
+ return mGlobalProgressOrNull ? mGlobalProgressOrNull->Progress()
+ : ProportionValue::MakeInvalid();
+ }
+
+ // Retrieve the last known global location. May be null.
+ [[nodiscard]] const char* GetLastGlobalLocation() const {
+ return mGlobalProgressOrNull ? mGlobalProgressOrNull->LastLocation()
+ : nullptr;
+ }
+
+ // Set the current progress in the local space.
+ void SetLocalProgress(ProportionValue aLocalProgress,
+ const char* aLocationOrNullEmptyToIgnore) {
+ MOZ_ASSERT(!IsMovedFrom());
+ if (mGlobalProgressOrNull && !mLocalToGlobalMultiplier.IsExactlyZero()) {
+ mGlobalProgressOrNull->SetProgress(LocalToGlobal(aLocalProgress),
+ aLocationOrNullEmptyToIgnore);
+#ifdef DEBUG_PROCESSLOGGER
+ printf("[%d] - local %.0f%% ~ global %.2f%% \"%s\"\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()),
+ aLocalProgress.ToDouble() * 100.0,
+ LocalToGlobal(aLocalProgress).ToDouble() * 100.0,
+ aLocationOrNullEmptyToIgnore ? aLocationOrNullEmptyToIgnore
+ : "<null>");
+#endif // DEBUG_PROCESSLOGGER
+ }
+ }
+
+ // Create a sub-logger that will record progress in the given local range.
+ // E.g.: `f(pl.CreateSubLoggerFromTo(0.2, "f...", 0.4, "f done"));` expects
+ // that `f` will produce work in the local range 0.2 (when starting) to 0.4
+ // (when returning); `f` itself will update this provided logger from 0.0
+ // to 1.0 (local to that `f` function), which will effectively be converted to
+ // 0.2-0.4 (local to the calling function).
+ // This can cascade multiple levels, each deeper level affecting a smaller and
+ // smaller range in the global output.
+ [[nodiscard]] ProgressLogger CreateSubLoggerFromTo(
+ ProportionValue aSubStartInLocalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtStart,
+ ProportionValue aSubEndInLocalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtEnd = NO_LOCATION_UPDATE) {
+ MOZ_ASSERT(!IsMovedFrom());
+ if (!mGlobalProgressOrNull) {
+ return ProgressLogger{};
+ }
+ const ProportionValue subStartInGlobalSpace =
+ LocalToGlobal(aSubStartInLocalSpace);
+ const ProportionValue subEndInGlobalSpace =
+ LocalToGlobal(aSubEndInLocalSpace);
+ if (subStartInGlobalSpace.IsInvalid() || subEndInGlobalSpace.IsInvalid()) {
+ return ProgressLogger{mGlobalProgressOrNull,
+ /* Start */ ProportionValue::MakeInvalid(),
+ /* Multiplier */ ProportionValue{0.0},
+ aLocationOrNullEmptyToIgnoreAtStart,
+ aLocationOrNullEmptyToIgnoreAtEnd};
+ }
+#ifdef DEBUG_PROCESSLOGGER
+ if (mGlobalProgressOrNull) {
+ printf("[%d] * Sub: local [%.0f%%, %.0f%%] ~ global [%.2f%%, %.2f%%]\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()),
+ aSubStartInLocalSpace.ToDouble() * 100.0,
+ aSubEndInLocalSpace.ToDouble() * 100.0,
+ subStartInGlobalSpace.ToDouble() * 100.0,
+ subEndInGlobalSpace.ToDouble() * 100.0);
+ }
+#endif // DEBUG_PROCESSLOGGER
+ return ProgressLogger{
+ mGlobalProgressOrNull,
+ /* Start */ subStartInGlobalSpace,
+ /* Multipler */ subEndInGlobalSpace - subStartInGlobalSpace,
+ aLocationOrNullEmptyToIgnoreAtStart, aLocationOrNullEmptyToIgnoreAtEnd};
+ }
+
+ // Helper with no start location.
+ [[nodiscard]] ProgressLogger CreateSubLoggerFromTo(
+ ProportionValue aSubStartInLocalSpace,
+ ProportionValue aSubEndInLocalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtEnd = NO_LOCATION_UPDATE) {
+ return CreateSubLoggerFromTo(aSubStartInLocalSpace, NO_LOCATION_UPDATE,
+ aSubEndInLocalSpace,
+ aLocationOrNullEmptyToIgnoreAtEnd);
+ }
+
+ // Helper using the current progress as start.
+ [[nodiscard]] ProgressLogger CreateSubLoggerTo(
+ const char* aLocationOrNullEmptyToIgnoreAtStart,
+ ProportionValue aSubEndInLocalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtEnd = NO_LOCATION_UPDATE) {
+ MOZ_ASSERT(!IsMovedFrom());
+ if (!mGlobalProgressOrNull) {
+ return ProgressLogger{};
+ }
+ const ProportionValue subStartInGlobalSpace = GetGlobalProgress();
+ const ProportionValue subEndInGlobalSpace =
+ LocalToGlobal(aSubEndInLocalSpace);
+ if (subStartInGlobalSpace.IsInvalid() || subEndInGlobalSpace.IsInvalid()) {
+ return ProgressLogger{mGlobalProgressOrNull,
+ /* Start */ ProportionValue::MakeInvalid(),
+ /* Multiplier */ ProportionValue{0.0},
+ aLocationOrNullEmptyToIgnoreAtStart,
+ aLocationOrNullEmptyToIgnoreAtEnd};
+ }
+#ifdef DEBUG_PROCESSLOGGER
+ if (mGlobalProgressOrNull) {
+ printf("[%d] * Sub: local [(here), %.0f%%] ~ global [%.2f%%, %.2f%%]\n",
+ int(baseprofiler::profiler_current_process_id().ToNumber()),
+ aSubEndInLocalSpace.ToDouble() * 100.0,
+ subStartInGlobalSpace.ToDouble() * 100.0,
+ subEndInGlobalSpace.ToDouble() * 100.0);
+ }
+#endif // DEBUG_PROCESSLOGGER
+ return ProgressLogger{
+ mGlobalProgressOrNull,
+ /* Start */ subStartInGlobalSpace,
+ /* Multiplier */ subEndInGlobalSpace - subStartInGlobalSpace,
+ aLocationOrNullEmptyToIgnoreAtStart, aLocationOrNullEmptyToIgnoreAtEnd};
+ }
+
+ // Helper using the current progress as start, no start location.
+ [[nodiscard]] ProgressLogger CreateSubLoggerTo(
+ ProportionValue aSubEndInLocalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtEnd = NO_LOCATION_UPDATE) {
+ return CreateSubLoggerTo(NO_LOCATION_UPDATE, aSubEndInLocalSpace,
+ aLocationOrNullEmptyToIgnoreAtEnd);
+ }
+
+ class IndexAndProgressLoggerRange;
+
+ [[nodiscard]] inline IndexAndProgressLoggerRange CreateLoopSubLoggersFromTo(
+ ProportionValue aLoopStartInLocalSpace,
+ ProportionValue aLoopEndInLocalSpace, uint32_t aLoopCount,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges =
+ ProgressLogger::NO_LOCATION_UPDATE);
+ [[nodiscard]] inline IndexAndProgressLoggerRange CreateLoopSubLoggersTo(
+ ProportionValue aLoopEndInLocalSpace, uint32_t aLoopCount,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges =
+ ProgressLogger::NO_LOCATION_UPDATE);
+
+ private:
+ // All constructions start at the local 0%.
+ ProgressLogger(RefPtr<SharedProgress> aGlobalProgressOrNull,
+ ProportionValue aLocalStartInGlobalSpace,
+ ProportionValue aLocalToGlobalMultiplier,
+ const char* aLocationOrNullEmptyToIgnoreAtConstruction,
+ const char* aLocationOrNullEmptyToIgnoreAtDestruction)
+ : mGlobalProgressOrNull(std::move(aGlobalProgressOrNull)),
+ mLocalStartInGlobalSpace(aLocalStartInGlobalSpace),
+ mLocalToGlobalMultiplier(aLocalToGlobalMultiplier),
+ mLocationAtDestruction(aLocationOrNullEmptyToIgnoreAtDestruction) {
+ MOZ_ASSERT(!IsMovedFrom(), "Don't construct a moved-from object!");
+ SetLocalProgress(ProportionValue{0.0},
+ aLocationOrNullEmptyToIgnoreAtConstruction);
+ }
+
+ void MarkMovedFrom() {
+ mLocalToGlobalMultiplier = ProportionValue::MakeInvalid();
+ }
+ [[nodiscard]] bool IsMovedFrom() const {
+ return mLocalToGlobalMultiplier.IsInvalid();
+ }
+
+ [[nodiscard]] ProportionValue LocalToGlobal(
+ ProportionValue aLocalProgress) const {
+ return aLocalProgress * mLocalToGlobalMultiplier + mLocalStartInGlobalSpace;
+ }
+
+ // Global progress value to update from local changes.
+ RefPtr<SharedProgress> mGlobalProgressOrNull;
+
+ // How much to multiply and add to a local [0, 100%] value, to get the
+ // corresponding value in the global space.
+ // If mLocalToGlobalMultiplier is invalid, this ProgressLogger is moved-from,
+ // functions should not be used, and destructor won't update progress.
+ ProportionValue mLocalStartInGlobalSpace;
+ ProportionValue mLocalToGlobalMultiplier;
+
+ const char* mLocationAtDestruction = nullptr;
+};
+
+// Helper class for range-for loop, e.g., with `aProgressLogger`:
+// for (auto [index, loopProgressLogger] :
+// IndexAndProgressLoggerRange{aProgressLogger, 30_pc, 50_pc, 10,
+// "looping..."}) {
+// // This will loop 10 times.
+// // `index` is the loop index, from 0 to 9.
+// // The overall loop will start at 30% and end at 50% of aProgressLogger.
+// // `loopProgressLogger` is the progress logger for each iteration,
+// // covering 1/10th of the range, therefore: [30%,32%], then [32%,34%],
+// // etc. until [48%,50%].
+// // Progress is automatically updated before/after each loop.
+// }
+// Note that this implementation is single-threaded, it does not support logging
+// progress from parallel loops.
+class ProgressLogger::IndexAndProgressLoggerRange {
+ public:
+ struct IndexAndProgressLogger {
+ uint32_t index;
+ ProgressLogger progressLogger;
+ };
+
+ class IndexAndProgressLoggerEndIterator {
+ public:
+ explicit IndexAndProgressLoggerEndIterator(uint32_t aIndex)
+ : mIndex(aIndex) {}
+
+ [[nodiscard]] uint32_t Index() const { return mIndex; }
+
+ private:
+ uint32_t mIndex;
+ };
+
+ class IndexAndProgressLoggerIterator {
+ public:
+ IndexAndProgressLoggerIterator(
+ RefPtr<ProgressLogger::SharedProgress> aGlobalProgressOrNull,
+ ProportionValue aLoopStartInGlobalSpace,
+ ProportionValue aLoopIncrementInGlobalSpace,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges)
+ : mGlobalProgressOrNull(aGlobalProgressOrNull),
+ mLoopStartInGlobalSpace(aLoopStartInGlobalSpace),
+ mLoopIncrementInGlobalSpace(aLoopIncrementInGlobalSpace),
+ mIndex(0u),
+ mLocationOrNullEmptyToIgnoreAtEdges(
+ aLocationOrNullEmptyToIgnoreAtEdges) {
+ if (mGlobalProgressOrNull) {
+ mGlobalProgressOrNull->SetProgress(mLoopStartInGlobalSpace,
+ mLocationOrNullEmptyToIgnoreAtEdges);
+ }
+ }
+
+ [[nodiscard]] IndexAndProgressLogger operator*() {
+ return IndexAndProgressLogger{
+ mIndex,
+ mGlobalProgressOrNull
+ ? ProgressLogger{mGlobalProgressOrNull, mLoopStartInGlobalSpace,
+ mLoopIncrementInGlobalSpace,
+ ProgressLogger::NO_LOCATION_UPDATE,
+ ProgressLogger::NO_LOCATION_UPDATE}
+ : ProgressLogger{}};
+ }
+
+ [[nodiscard]] bool operator!=(
+ const IndexAndProgressLoggerEndIterator& aEnd) const {
+ return mIndex != aEnd.Index();
+ }
+
+ IndexAndProgressLoggerIterator& operator++() {
+ ++mIndex;
+ mLoopStartInGlobalSpace =
+ mLoopStartInGlobalSpace + mLoopIncrementInGlobalSpace;
+ if (mGlobalProgressOrNull) {
+ mGlobalProgressOrNull->SetProgress(mLoopStartInGlobalSpace,
+ mLocationOrNullEmptyToIgnoreAtEdges);
+ }
+ return *this;
+ }
+
+ private:
+ RefPtr<ProgressLogger::SharedProgress> mGlobalProgressOrNull;
+ ProportionValue mLoopStartInGlobalSpace;
+ ProportionValue mLoopIncrementInGlobalSpace;
+ uint32_t mIndex;
+ const char* mLocationOrNullEmptyToIgnoreAtEdges;
+ };
+
+ [[nodiscard]] IndexAndProgressLoggerIterator begin() {
+ return IndexAndProgressLoggerIterator{
+ mGlobalProgressOrNull, mLoopStartInGlobalSpace,
+ mLoopIncrementInGlobalSpace, mLocationOrNullEmptyToIgnoreAtEdges};
+ }
+
+ [[nodiscard]] IndexAndProgressLoggerEndIterator end() {
+ return IndexAndProgressLoggerEndIterator{mLoopCount};
+ }
+
+ private:
+ friend class ProgressLogger;
+ IndexAndProgressLoggerRange(ProgressLogger& aProgressLogger,
+ ProportionValue aLoopStartInGlobalSpace,
+ ProportionValue aLoopEndInGlobalSpace,
+ uint32_t aLoopCount,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges =
+ ProgressLogger::NO_LOCATION_UPDATE)
+ : mGlobalProgressOrNull(aProgressLogger.mGlobalProgressOrNull),
+ mLoopStartInGlobalSpace(aLoopStartInGlobalSpace),
+ mLoopIncrementInGlobalSpace(
+ (aLoopEndInGlobalSpace - aLoopStartInGlobalSpace) / aLoopCount),
+ mLoopCount(aLoopCount),
+ mLocationOrNullEmptyToIgnoreAtEdges(
+ aLocationOrNullEmptyToIgnoreAtEdges) {}
+
+ RefPtr<ProgressLogger::SharedProgress> mGlobalProgressOrNull;
+ ProportionValue mLoopStartInGlobalSpace;
+ ProportionValue mLoopIncrementInGlobalSpace;
+ uint32_t mLoopCount;
+ const char* mLocationOrNullEmptyToIgnoreAtEdges;
+};
+
+[[nodiscard]] ProgressLogger::IndexAndProgressLoggerRange
+ProgressLogger::CreateLoopSubLoggersFromTo(
+ ProportionValue aLoopStartInLocalSpace,
+ ProportionValue aLoopEndInLocalSpace, uint32_t aLoopCount,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges) {
+ return IndexAndProgressLoggerRange{
+ *this, LocalToGlobal(aLoopStartInLocalSpace),
+ LocalToGlobal(aLoopEndInLocalSpace), aLoopCount,
+ aLocationOrNullEmptyToIgnoreAtEdges};
+}
+
+[[nodiscard]] ProgressLogger::IndexAndProgressLoggerRange
+ProgressLogger::CreateLoopSubLoggersTo(
+ ProportionValue aLoopEndInLocalSpace, uint32_t aLoopCount,
+ const char* aLocationOrNullEmptyToIgnoreAtEdges) {
+ return IndexAndProgressLoggerRange{
+ *this, GetGlobalProgress(), LocalToGlobal(aLoopEndInLocalSpace),
+ aLoopCount, aLocationOrNullEmptyToIgnoreAtEdges};
+}
+
+} // namespace mozilla
+
+#endif // ProgressLogger_h
diff --git a/mozglue/baseprofiler/public/ProportionValue.h b/mozglue/baseprofiler/public/ProportionValue.h
new file mode 100644
index 0000000000..61eb2766ec
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProportionValue.h
@@ -0,0 +1,235 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProportionValue_h
+#define ProportionValue_h
+
+#include "mozilla/Attributes.h"
+
+#include <algorithm>
+#include <limits>
+
+namespace mozilla {
+
+// Class storing a proportion value between 0 and 1, effectively 0% to 100%.
+// The public interface deals with doubles, but internally the value is encoded
+// in an integral type, so arithmetic operations are fast.
+// It also supports an invalid value: Use MakeInvalid() to construct, it infects
+// any operation, and gets converted to a signaling NaN.
+class ProportionValue {
+ public:
+ using UnderlyingType = uint32_t;
+
+ // Default-construct at 0%.
+ constexpr ProportionValue()
+ // This `noexcept` is necessary to avoid a build error when encapsulating
+ // `ProportionValue` in `std::Atomic`:
+ // "use of deleted function
+ // 'constexpr std::atomic<mozilla::ProportionValue>::atomic()"
+ // because the default `std::atomic<T>::atomic()` constructor is marked:
+ // `noexcept(std::is_nothrow_default_constructible_v<T>)`
+ // and therefore this default constructor here must be explicitly marked
+ // `noexcept` as well.
+ noexcept
+ : mIntegralValue(0u) {}
+
+ // Construct a ProportionValue with the given value, clamped to 0..1.
+ // Note that it's constexpr, so construction from literal numbers should incur
+ // no runtime costs.
+ // If `aValue` is NaN, behavior is undefined! Use `MakeInvalid()` instead.
+ constexpr explicit ProportionValue(double aValue)
+ : mIntegralValue(UnderlyingType(std::clamp(aValue, 0.0, 1.0) * scMaxD)) {}
+
+ [[nodiscard]] static constexpr ProportionValue MakeInvalid() {
+ return ProportionValue(scInvalidU, Internal{});
+ }
+
+ [[nodiscard]] constexpr double ToDouble() const {
+ return IsInvalid() ? std::numeric_limits<double>::signaling_NaN()
+ : (double(mIntegralValue) * scInvMaxD);
+ }
+
+ // Retrieve the underlying integral value, for storage or testing purposes.
+ [[nodiscard]] constexpr UnderlyingType ToUnderlyingType() const {
+ return mIntegralValue;
+ };
+
+ // Re-construct a ProportionValue from an underlying integral value.
+ [[nodiscard]] static constexpr ProportionValue FromUnderlyingType(
+ UnderlyingType aUnderlyingType) {
+ return ProportionValue(
+ (aUnderlyingType <= scMaxU) ? aUnderlyingType : scInvalidU, Internal{});
+ }
+
+ [[nodiscard]] constexpr bool IsExactlyZero() const {
+ return mIntegralValue == 0u;
+ }
+
+ [[nodiscard]] constexpr bool IsExactlyOne() const {
+ return mIntegralValue == scMaxU;
+ }
+
+ [[nodiscard]] constexpr bool IsValid() const {
+ // Compare to the maximum value, not just exactly scInvalidU, to catch any
+ // kind of invalid state.
+ return mIntegralValue <= scMaxU;
+ }
+ [[nodiscard]] constexpr bool IsInvalid() const {
+ // Compare to the maximum value, not just exactly scInvalidU, to catch any
+ // kind of invalid state.
+ return mIntegralValue > scMaxU;
+ }
+
+ // Strict comparisons based on the underlying integral value. Use
+ // `CompareWithin` instead to make fuzzy comparisons.
+ // `ProportionValue::MakeInvalid()`s are equal, and greater than anything
+ // else; Best to avoid comparisons, and first use IsInvalid() instead.
+#define OPERATOR_COMPARISON(CMP) \
+ [[nodiscard]] constexpr friend bool operator CMP( \
+ const ProportionValue& aLHS, const ProportionValue& aRHS) { \
+ return aLHS.mIntegralValue CMP aRHS.mIntegralValue; \
+ }
+ OPERATOR_COMPARISON(==)
+ OPERATOR_COMPARISON(!=)
+ OPERATOR_COMPARISON(<)
+ OPERATOR_COMPARISON(<=)
+ OPERATOR_COMPARISON(>)
+ OPERATOR_COMPARISON(>=)
+#undef OPERATOR_COMPARISON
+
+ // Arithmetic operations + - *, all working on the underlying integral values
+ // (i.e, no expensive floating-point operations are used), and always clamping
+ // to 0..1 range. Invalid values are poisonous.
+
+ [[nodiscard]] constexpr ProportionValue operator+(
+ ProportionValue aRHS) const {
+ return ProportionValue(
+ (IsInvalid() || aRHS.IsInvalid())
+ ? scInvalidU
+ // Adding fixed-point values keep the same scale, so there is no
+ // adjustment needed for that. [0,1]+[0,1]=[0,2], so we only need to
+ // ensure that the result is capped at max 1, aka scMaxU:
+ // a+b<=max <=> b<=max-a, so b is at maximum max-a.
+ : (mIntegralValue +
+ std::min(aRHS.mIntegralValue, scMaxU - mIntegralValue)),
+ Internal{});
+ }
+
+ [[nodiscard]] constexpr ProportionValue operator-(
+ ProportionValue aRHS) const {
+ return ProportionValue(
+ (IsInvalid() || aRHS.IsInvalid())
+ ? scInvalidU
+ // Subtracting fixed-point values keep the same scale, so there is
+ // no adjustment needed for that. [0,1]-[0,1]=[-1,1], so we only
+ // need to ensure that the value is positive:
+ // a-b>=0 <=> b<=a, so b is at maximum a.
+ : (mIntegralValue - std::min(aRHS.mIntegralValue, mIntegralValue)),
+ Internal{});
+ }
+
+ [[nodiscard]] constexpr ProportionValue operator*(
+ ProportionValue aRHS) const {
+ // Type to hold the full result of multiplying two maximum numbers.
+ using DoublePrecisionType = uint64_t;
+ static_assert(sizeof(DoublePrecisionType) >= 2 * sizeof(UnderlyingType));
+ return ProportionValue(
+ (IsInvalid() || aRHS.IsInvalid())
+ ? scInvalidU
+ // Multiplying fixed-point values doubles the scale (2^31 -> 2^62),
+ // so we need to adjust the result by dividing it by one scale
+ // (which is optimized into a binary right-shift).
+ : (UnderlyingType((DoublePrecisionType(mIntegralValue) *
+ DoublePrecisionType(aRHS.mIntegralValue)) /
+ DoublePrecisionType(scMaxU))),
+ Internal{});
+ }
+
+ // Explicitly forbid divisions, they make little sense, and would almost
+ // always return a clamped 100% (E.g.: 50% / 10% = 0.5 / 0.1 = 5 = 500%).
+ [[nodiscard]] constexpr ProportionValue operator/(
+ ProportionValue aRHS) const = delete;
+
+ // Division by a positive integer value, useful to split an interval in equal
+ // parts (with maybe some spare space at the end, because it is rounded down).
+ // Division by 0 produces an invalid value.
+ [[nodiscard]] constexpr ProportionValue operator/(uint32_t aDivisor) const {
+ return ProportionValue((IsInvalid() || aDivisor == 0u)
+ ? scInvalidU
+ : (mIntegralValue / aDivisor),
+ Internal{});
+ }
+
+ // Multiplication by a positive integer value, useful as inverse of the
+ // integer division above. But it may be lossy because the division is rounded
+ // down, therefore: PV - u < (PV / u) * u <= PV.
+ // Clamped to 100% max.
+ [[nodiscard]] constexpr ProportionValue operator*(
+ uint32_t aMultiplier) const {
+ return ProportionValue(IsInvalid()
+ ? scInvalidU
+ : ((aMultiplier > scMaxU / mIntegralValue)
+ ? scMaxU
+ : (mIntegralValue * aMultiplier)),
+ Internal{});
+ }
+
+ private:
+ // Tagged constructor for internal construction from the UnderlyingType, so
+ // that it is never ambiguously considered in constructions from one number.
+ struct Internal {};
+ constexpr ProportionValue(UnderlyingType aIntegralValue, Internal)
+ : mIntegralValue(aIntegralValue) {}
+
+ // Use all but 1 bit for the fractional part.
+ // Valid values can go from 0b0 (0%) up to 0b1000...00 (scMaxU aka 100%).
+ static constexpr unsigned scFractionalBits = sizeof(UnderlyingType) * 8 - 1;
+ // Maximum value corresponding to 1.0 or 100%.
+ static constexpr UnderlyingType scMaxU = UnderlyingType(1u)
+ << scFractionalBits;
+ // This maximum value corresponding to 1.0 can also be seen as the scaling
+ // factor from any [0,1] `double` value to the internal integral value.
+ static constexpr double scMaxD = double(scMaxU);
+ // The inverse can be used to convert the internal value back to [0,1].
+ static constexpr double scInvMaxD = 1.0 / scMaxD;
+
+ // Special value outside [0,max], used to construct invalid values.
+ static constexpr UnderlyingType scInvalidU = ~UnderlyingType(0u);
+
+ // Internal integral value, guaranteed to always be <= scMaxU, or scInvalidU.
+ // This is effectively a fixed-point value using 1 bit for the integer part
+ // and 31 bits for the fractional part.
+ // It is roughly equal to the `double` value [0,1] multiplied by scMaxD.
+ UnderlyingType mIntegralValue;
+};
+
+namespace literals {
+inline namespace ProportionValue_literals {
+
+// User-defined literal for integer percentages, e.g.: `10_pc`, `100_pc`
+// (equivalent to `ProportionValue{0.1}` and `ProportionValue{1.0}`).
+// Clamped to [0, 100]_pc.
+[[nodiscard]] constexpr ProportionValue operator""_pc(
+ unsigned long long int aPercentage) {
+ return ProportionValue{
+ double(std::clamp<unsigned long long int>(aPercentage, 0u, 100u)) /
+ 100.0};
+}
+
+// User-defined literal for non-integer percentages, e.g.: `12.3_pc`, `100.0_pc`
+// (equivalent to `ProportionValue{0.123}` and `ProportionValue{1.0}`).
+// Clamped to [0.0, 100.0]_pc.
+[[nodiscard]] constexpr ProportionValue operator""_pc(long double aPercentage) {
+ return ProportionValue{
+ double(std::clamp<long double>(aPercentage, 0.0, 100.0)) / 100.0};
+}
+
+} // namespace ProportionValue_literals
+} // namespace literals
+
+} // namespace mozilla
+
+#endif // ProportionValue_h
diff --git a/mozglue/baseprofiler/public/leb128iterator.h b/mozglue/baseprofiler/public/leb128iterator.h
new file mode 100644
index 0000000000..636baf916f
--- /dev/null
+++ b/mozglue/baseprofiler/public/leb128iterator.h
@@ -0,0 +1,207 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// LEB128 utilities that can read/write unsigned LEB128 numbers from/to
+// iterators.
+//
+// LEB128 = Little Endian Base 128, where small numbers take few bytes, but
+// large numbers are still allowed, which is ideal when serializing numbers that
+// are likely to be small.
+// Each byte contains 7 bits from the number, starting at the "little end", the
+// top bit is 0 for the last byte, 1 otherwise.
+// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
+//
+// Iterators only need to provide:
+// - `*it` to return a reference to the next byte to be read from or written to.
+// - `++it` to advance the iterator after a byte is written.
+//
+// The caller must always provide sufficient space to write any number, by:
+// - pre-allocating a large enough buffer, or
+// - allocating more space when `++it` reaches the end and/or `*it` is invoked
+// after the end, or
+// - moving the underlying pointer to an appropriate location (e.g., wrapping
+// around a circular buffer).
+// The caller must also provide enough bytes to read a full value (i.e., at
+// least one byte should have its top bit unset), and a type large enough to
+// hold the stored value.
+//
+// Note: There are insufficient checks for validity! These functions are
+// intended to be used together, i.e., the user should only `ReadULEB128()` from
+// a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
+// Using with externally-sourced data (e.g., DWARF) is *not* recommended.
+//
+// https://en.wikipedia.org/wiki/LEB128
+
+#ifndef leb128iterator_h
+#define leb128iterator_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Likely.h"
+
+#include <climits>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// Number of bytes needed to represent `aValue`.
+template <typename T>
+constexpr uint_fast8_t ULEB128Size(T aValue) {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "ULEB128Size only takes unsigned types");
+ // We need one output byte per 7 bits of non-zero value. So we just remove
+ // 7 least significant bits at a time until the value becomes zero.
+ // Note the special case of 0, which still needs 1 output byte; this is done
+ // by starting the first loop before we check for 0.
+ uint_fast8_t size = 0;
+ for (;;) {
+ size += 1;
+ aValue >>= 7;
+ // Expecting small values, so it should be more likely that `aValue == 0`.
+ if (MOZ_LIKELY(aValue == 0)) {
+ return size;
+ }
+ }
+}
+
+// Maximum number of bytes needed to represent any value of type `T`.
+template <typename T>
+constexpr uint_fast8_t ULEB128MaxSize() {
+ return ULEB128Size<T>(std::numeric_limits<T>::max());
+}
+
+// Write `aValue` in LEB128 to `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+void WriteULEB128(T aValue, It& aIterator) {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "WriteULEB128 only takes unsigned types");
+ using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+ static_assert(sizeof(IteratorValue) == 1,
+ "WriteULEB128 expects an iterator to single bytes");
+ // 0. Don't test for 0 yet, as we want to output one byte for it.
+ for (;;) {
+ // 1. Extract the 7 least significant bits.
+ const uint_fast8_t byte = aValue & 0x7Fu;
+ // 2. Remove them from `aValue`.
+ aValue >>= 7;
+ // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
+ // (meaning there will be more bytes after this one.)
+ // Expecting small values, so it should be more likely that `aValue == 0`.
+ // Note: No absolute need to force-cast to IteratorValue, because we have
+ // only changed the bottom 8 bits above. However the compiler could warn
+ // about a narrowing conversion from potentially-multibyte uint_fast8_t down
+ // to whatever single-byte type `*iterator* expects, so we make it explicit.
+ *aIterator = static_cast<IteratorValue>(
+ MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
+ // 4. Always advance the iterator to the next byte.
+ ++aIterator;
+ // 5. We're done if `aValue` is 0.
+ // Expecting small values, so it should be more likely that `aValue == 0`.
+ if (MOZ_LIKELY(aValue == 0)) {
+ return;
+ }
+ }
+}
+
+// Read an LEB128 value from `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+T ReadULEB128(It& aIterator) {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "ReadULEB128 must return an unsigned type");
+ using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+ static_assert(sizeof(IteratorValue) == 1,
+ "ReadULEB128 expects an iterator to single bytes");
+ // Incoming bits will be added to `result`...
+ T result = 0;
+ // ... starting with the least significant bits.
+ uint_fast8_t shift = 0;
+ for (;;) {
+ // 1. Read one byte from the iterator.
+ // `static_cast` just in case IteratorValue is not implicitly convertible to
+ // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
+ // dealing with the bottom 8 bits below.
+ const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
+ // 2. Always advance the iterator.
+ ++aIterator;
+ // 3. Extract the 7 bits of value, and shift them in place into `result`.
+ result |= static_cast<T>(byte & 0x7fu) << shift;
+ // 4. If the 8th bit is *not* set, this was the last byte.
+ // Expecting small values, so it should be more likely that the bit is off.
+ if (MOZ_LIKELY((byte & 0x80u) == 0)) {
+ return result;
+ }
+ // There are more bytes to read.
+ // 5. Next byte will contain more significant bits above the past 7.
+ shift += 7;
+ // Safety check that we're not going to shift by >= than the type size,
+ // which is Undefined Behavior in C++.
+ MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
+ }
+}
+
+// constexpr ULEB128 reader class.
+// Mostly useful when dealing with non-trivial byte feeds.
+template <typename T>
+class ULEB128Reader {
+ static_assert(!std::numeric_limits<T>::is_signed,
+ "ULEB128Reader must handle an unsigned type");
+
+ public:
+ constexpr ULEB128Reader() = default;
+
+ // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
+ constexpr ULEB128Reader(const ULEB128Reader&) = delete;
+ constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;
+
+ // Feed a byte into the parser.
+ // Returns true if this was the last byte.
+ [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
+ MOZ_ASSERT(!IsComplete());
+ // Extract the 7 bits of value, and shift them in place into the value.
+ mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
+ // If the 8th bit is *not* set, this was the last byte.
+ // Expecting small values, so it should be more likely that the bit is off.
+ if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
+ mShift = mCompleteShift;
+ return true;
+ }
+ // There are more bytes to read.
+ // Next byte will contain more significant bits above the past 7.
+ mShift += 7;
+ // Safety check that we're not going to shift by >= than the type size,
+ // which is Undefined Behavior in C++.
+ MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
+ return false;
+ }
+
+ constexpr void Reset() {
+ mValue = 0;
+ mShift = 0;
+ }
+
+ [[nodiscard]] constexpr bool IsComplete() const {
+ return mShift == mCompleteShift;
+ }
+
+ [[nodiscard]] constexpr T Value() const {
+ MOZ_ASSERT(IsComplete());
+ return mValue;
+ }
+
+ private:
+ // Special value of `mShift` indicating that parsing is complete.
+ constexpr static unsigned mCompleteShift = 0x10000u;
+
+ T mValue = 0;
+ unsigned mShift = 0;
+};
+
+} // namespace mozilla
+
+#endif // leb128iterator_h