summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py')
-rw-r--r--python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py1286
1 files changed, 1286 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py
new file mode 100644
index 0000000000..8163c05dc3
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py
@@ -0,0 +1,1286 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Utility package for working with moz.yaml files.
+#
+# Requires `pyyaml` and `voluptuous`
+# (both are in-tree under third_party/python)
+
+"""
+Problem:
+ ./mach vendor needs to be able to add or remove files from moz.build files automatically to
+ be able to effectively update a library automatically and send useful try runs in.
+
+ So far, it has been difficult to do that.
+
+ Why:
+ - Some files need to go into UNIFIED_SOURCES vs SOURCES
+ - Some files are os-specific, and need to go into per-OS conditionals
+ - Some files are both UNIFIED_SOURCES/SOURCES sensitive and OS-specific.
+
+Proposal:
+ Design an algorithm that maps a third party library file to a suspected moz.build location.
+ Run the algorithm on all files specified in all third party libraries' moz.build files.
+ See if the proposed place in the moz.build file matches the actual place.
+
+Initial Algorithm
+ Given a file, which includes the filename and the path from gecko root, we want to find the
+ correct moz.build file and location within that file.
+ Take the path of the file, and iterate up the directory tree, looking for moz.build files as
+ we go.
+ Consider each of these moz.build files, starting with the one closest to the file.
+ Within a moz.build file, identify the SOURCES or UNIFIED_SOURCES block(s) that contains a file
+ in the same directory path as the file to be added.
+ If there is only one such block, use that one.
+ If there are multiple blocks, look at the files within each block and note the longest length
+ of a common prefix (including partial filenames - if we just did full directories the
+ result would be the same as the prior step and we would not narrow the results down). Use
+ the block containing the longest prefix. (We call this 'guessing'.)
+
+Result of the proposal:
+ The initial implementation works on 1675 of 1977 elligible files.
+ The files it does not work on include:
+ - general failures. Such as when we find that avutil.cpp wants to be next to adler32.cpp
+ but avutil.cpp is in SOURCES and adler32.cpp is in UNIFIED_SOURCES. (And many similar
+ cases.)
+ - per-cpu-feature files, where only a single file is added under a conditional
+ - When guessing, because of a len(...) > longest_so_far comparison, we would prefer the
+ first block we found.
+ - Changing this to prefer UNIFIED_SOURCES in the event of a tie
+ yielded 17 additional correct assignments (about a 1% improvement)
+ - As a result of the change immediately above, when guessing, because given equal
+ prefixes, we would prefer a UNIFIED_SOURCES block over other blocks, even if the other
+ blocks are longer
+ - Changing this (again) to prefer the block containing more files yielded 49 additional
+ correct assignments (about a 2.5% improvement)
+
+ The files that are ineligible for consideration are:
+ - Those in libwebrtc
+ - Those specified in source assignments composed of generators (e.g. [f for f in '%.c'])
+ - Those specified in source assignments to subscripted variables
+ (e.g. SOURCES += foo['x86_files'])
+
+ We needed to iterate up the directory and look at a different moz.build file _zero_ times.
+ This indicates this code is probably not needed, and therefore we will remove it from the
+ algorithm.
+ We needed to guess base on the longest prefix 944 times, indicating that this code is
+ absolutely crucial and should be double-checked. (And indeed, upon double-checking it,
+ bugs were identified.)
+
+ After some initial testing, it was determined that this code completely fell down when the
+ vendoring directory differed from the moz.yaml directory (definitions below.) The code was
+ slightly refactored to handle this case, primarily by (a) re-inserting the logic to check
+ multiple moz.build files instead of the first and (b) handling some complicated normalization
+ notions (details in comments).
+
+Slightly Improved Algorithm Changes:
+ Don't bother iterating up the directory tree looking for moz.build files, just take the first.
+ When guessing, in the event of a common-prefix tie, prefer the block containing more files
+
+ With these changes, we now Successfully Matched 1724 of 1977 files
+
+CODE CONCEPTS
+
+source-assignment
+ An assignment of files to a SOURCES or UNIFIED_SOURCES variable, such as
+ SOURCES += ['ffpvx.cpp']
+
+ We specifically look only for these two variable names to avoid identifying things
+ such as CXX_FLAGS.
+
+ Sometimes; however, there is an intermediary variable, such as `SOURCES += celt_filenames`
+ In this situation we find the celt_filenames assignment, and treat it as a 'source-assignment'
+
+source-assignment-location
+ source-assignment-location is a human readable string that identifies where in the moz.build
+ file the source-assignment is. It can used to visually match the location upon manual
+ inspection; and given a source-assignment-location, re-identify it when iterating over all
+ source-assignments in a file.
+
+ The actual string consists of the path from the root of the moz.build file to the
+ source-assignment, plus a suffix number.
+
+ We suffix the final value with an incrementing counter. This is to support moz.build files
+ that, for whatever reason, use multiple SOURCES += [] list in the same basic block. This index
+ is per-file, so no two assignments in the same file (even if they have separate locations)
+ should have the same suffix.
+
+ For example:
+
+ When `SOURCES += ['ffpvx.xpp']` appears as the first line of the file (or any other
+ unindented-location) its source-assignment-location will be `> SOURCES 1`.
+
+ When `SOURCES += ['ffpvx.xpp']` appears inside a conditional such as
+ `CONFIG['OS_TARGET'] == 'WINNT'` then its source-assignment-location will be
+ `> if CONFIG['OS_TARGET'] == 'WINNT' > SOURCES 1`
+
+ When SOURCES += ['ffpvx.xpp'] appears as the second line of the file, and a different
+ SOURCES += [] was the first line, then its source-assignment-location will be "> SOURCES 2".
+
+ No two source-assignments may have the same source-assignment-location. If they do, we raise
+ an assert.
+
+file vs filename
+ a 'filename' is a string specifing the name and sometimes the path of a file.
+ a 'file' is an object you get from open()-ing a filename
+
+ A variable that is a string should always use 'filename'
+
+vendoring directory vs moz.yaml directory
+ In many cases, a library's moz.yaml file, moz.build file(s), and sources files will all live
+ under a single directory. e.g. libjpeg
+
+ In other cases, a library's source files are in one directory (we call this the 'vendoring
+ directory') and the moz.yaml file and moz.build file(s) are in another directory (we call this
+ the moz.yaml directory). e.g. libdav1d
+
+normalized-filename
+ A filename is 'normalized' if it has been expanded to the full path from the gecko root. This
+ requires a moz.build file.
+
+ For example a filename `lib/opus.c` may be specified inside the `media/libopus/moz.build`
+ file. The filename is normalized by os.path.join()-ing the dirname of the moz.build file
+ (i.e. `media/libopus`) to the filename, resulting in `media/libopus/lib/opus.c`
+
+ A filename that begins with '/' is presumed to already be specified relative to the gecko
+ root, and therefore is not modified.
+
+ Normalization gets more complicated when dealing with separate vendoring and moz.yaml
+ directories. This is because a file can be considered normalized when it looks like
+ third_party/libdav1d/src/a.cpp
+ _or_ when it looks like
+ media/libdav1d/../../third_party/libdav1d/src/a.cpp
+ This is because in the moz.build file, it will be specified as
+ `../../third_party/libdav1d/src/a.cpp` and we 'normalize' it by prepending the path to the
+ moz.build file.
+
+ Normalization is not just about having an 'absolute' path from gecko_root to file. In fact
+ it's not really about that at all - it's about matching filenames. Therefore when we are
+ dealing with separate vendoring and moz.yaml directories we will very quickly 're-normalize'
+ a normalized filename to get it into one of those foo/bar/../../third_party/... paths that
+ will make sense for the moz.build file we are interested in.
+
+ Whenever a filename is normalized, it should be specified as such in the variable name,
+ either as a prefix (normalized_filename) or a suffix (target_filename_normalized)
+
+statistic
+ Using some hacky stuff, we report statistics about how many times we hit certain branches of
+ the code.
+ e.g.
+ - "How many times did we refine a guess based on prefix length"
+ - "How many times did we refine a guess based on the number of files in the block"
+ - "What is the histogram of guess candidates"
+
+ We do this to identify how frequently certain code paths were taken, allowing us to identify
+ strange behavior and investigate outliers. This process lead to identifying bugs and small
+ improvements.
+"""
+
+import ast
+import copy
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pprint import pprint
+
+try:
+ from mozbuild.frontend.sandbox import alphabetical_sorted
+except Exception:
+
+ def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False):
+ return sorted(iterable, key=key, reverse=reverse)
+
+
+# This can be edited to enable better Python 3.8 behavior, but is set so that
+# everything is consistent by default so errors can be detected more easily.
+FORCE_DOWNGRADE_BEHAVIOR = True
+
+statistics = {
+ "guess_candidates": {},
+ "number_refinements": {},
+ "needed_to_guess": 0,
+ "length_logic": {},
+}
+
+
+def log(*args, **kwargs):
+ # If is helpful to keep some logging statements around, but we don't want to print them
+ # unless we are debugging
+ # print(*args, **kwargs)
+ pass
+
+
+##############################################
+
+import inspect
+
+
+def node_to_name(code, node):
+ if (
+ not FORCE_DOWNGRADE_BEHAVIOR
+ and sys.version_info[0] >= 3
+ and sys.version_info[1] >= 8
+ ):
+ return ast.get_source_segment(code, node)
+
+ return node.__class__.__name__
+
+
+def get_attribute_label(node):
+ assert isinstance(node, ast.Attribute)
+
+ label = ""
+ subtarget = node
+ while isinstance(subtarget, ast.Attribute):
+ label = subtarget.attr + ("." if label else "") + label
+ subtarget = subtarget.value
+
+ if isinstance(subtarget, ast.Name):
+ label = subtarget.id + "." + label
+ elif isinstance(subtarget, ast.Subscript) and isinstance(subtarget.value, ast.Name):
+ label = subtarget.value.id + "." + label
+ else:
+ raise Exception(
+ "Unxpected subtarget of type %s found in get_attribute_label. label=%s"
+ % (subtarget, label)
+ )
+
+ return label
+
+
+def ast_get_source_segment(code, node):
+ caller = inspect.stack()[1]
+
+ if "sphinx" in caller.filename or (
+ not FORCE_DOWNGRADE_BEHAVIOR
+ and sys.version_info[0] >= 3
+ and sys.version_info[1] >= 8
+ ):
+ return ast.original_get_source_segment(code, node)
+
+ if caller.function == "assignment_node_to_source_filename_list":
+ return ""
+
+ raise Exception(
+ "ast_get_source_segment is not available with this Python version. (ver=%s.%s, caller=%s)"
+ % (sys.version_info.major, sys.version_info.minor, caller.function)
+ )
+
+
+# Overwrite it so we don't accidently use it
+if sys.version_info[0] >= 3 and sys.version_info[1] >= 8:
+ ast.original_get_source_segment = ast.get_source_segment
+ ast.get_source_segment = ast_get_source_segment
+
+
+##############################################
+
+
+def node_to_readable_file_location(code, node, child_node=None):
+ location = ""
+
+ if isinstance(node.parent, ast.Module):
+ # The next node up is the root, don't go higher.
+ pass
+ else:
+ location += node_to_readable_file_location(code, node.parent, node)
+
+ location += " > "
+ if isinstance(node, ast.Module):
+ raise Exception("We shouldn't see a Module")
+ elif isinstance(node, ast.If):
+ assert child_node
+ if child_node in node.body:
+ location += "if " + node_to_name(code, node.test)
+ else:
+ location += "else-of-if " + node_to_name(code, node.test)
+ elif isinstance(node, ast.For):
+ location += (
+ "for "
+ + node_to_name(code, node.target)
+ + " in "
+ + node_to_name(code, node.iter)
+ )
+ elif isinstance(node, ast.AugAssign):
+ if isinstance(node.target, ast.Name):
+ location += node.target.id
+ else:
+ location += node_to_name(code, node.target)
+ elif isinstance(node, ast.Assign):
+ # This assert would fire if we did e.g. some_sources = all_sources = [ ... ]
+ assert len(node.targets) == 1, "Assignment node contains more than one target"
+ if isinstance(node.targets[0], ast.Name):
+ location += node.targets[0].id
+ else:
+ location += node_to_name(code, node.targets[0])
+ else:
+ raise Exception("Got a node type I don't know how to handle: " + str(node))
+
+ return location
+
+
+def assignment_node_to_source_filename_list(code, node):
+ """
+ If the list of filenames is not a list of constants (e.g. it's a generated list)
+ it's (probably) infeasible to try and figure it out. At least we're not going to try
+ right now. Maybe in the future?
+
+ If this happens, we'll return an empty list. The consequence of this is that we
+ won't be able to match a file against this list, so we may not be able to add it.
+
+ (But if the file matches a generated list, perhaps it will be included in the
+ Sources list automatically?)
+ """
+ if isinstance(node.value, ast.List) and "elts" in node.value._fields:
+ for f in node.value.elts:
+ if not isinstance(f, ast.Constant) and not isinstance(f, ast.Str):
+ log(
+ "Found non-constant source file name in list: ",
+ ast_get_source_segment(code, f),
+ )
+ return []
+ return [
+ f.value if isinstance(f, ast.Constant) else f.s for f in node.value.elts
+ ]
+ elif isinstance(node.value, ast.ListComp):
+ # SOURCES += [f for f in foo if blah]
+ log("Could not find the files for " + ast_get_source_segment(code, node.value))
+ elif isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript):
+ # SOURCES += other_var
+ # SOURCES += files['X64_SOURCES']
+ log("Could not find the files for " + ast_get_source_segment(code, node))
+ elif isinstance(node.value, ast.Call):
+ # SOURCES += sorted(...)
+ log("Could not find the files for " + ast_get_source_segment(code, node))
+ else:
+ raise Exception(
+ "Unexpected node received in assignment_node_to_source_filename_list: "
+ + str(node)
+ )
+ return []
+
+
+def mozbuild_file_to_source_assignments(normalized_mozbuild_filename, assignment_type):
+ """
+ Returns a dictionary of 'source-assignment-location' -> 'normalized source filename list'
+ contained in the moz.build file specified
+
+ normalized_mozbuild_filename: the moz.build file to read
+ """
+ source_assignments = {}
+
+ if assignment_type == "source-files":
+ targets = ["SOURCES", "UNIFIED_SOURCES"]
+ else:
+ targets = ["EXPORTS"]
+
+ # Parse the AST of the moz.build file
+ code = open(normalized_mozbuild_filename).read()
+ root = ast.parse(code)
+
+ # Populate node parents. This allows us to walk up from a node to the root.
+ # (Really I think python's ast class should do this, but it doesn't, so we monkey-patch it)
+ for node in ast.walk(root):
+ for child in ast.iter_child_nodes(node):
+ child.parent = node
+
+ # Find all the assignments of SOURCES or UNIFIED_SOURCES
+ if assignment_type == "source-files":
+ source_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.AugAssign)
+ and isinstance(node.target, ast.Name)
+ and node.target.id in targets
+ ]
+ assert (
+ len([n for n in source_assignment_nodes if not isinstance(n.op, ast.Add)])
+ == 0
+ ), "We got a Source assignment that wasn't +="
+
+ # Recurse and find nodes where we do SOURCES += other_var or SOURCES += FILES['foo']
+ recursive_assignment_nodes = [
+ node
+ for node in source_assignment_nodes
+ if isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript)
+ ]
+
+ recursive_assignment_nodes_names = [
+ node.value.id
+ for node in recursive_assignment_nodes
+ if isinstance(node.value, ast.Name)
+ ]
+
+ # TODO: We do not dig into subscript variables. These are currently only used by two
+ # libraries that use external sources.mozbuild files.
+ # recursive_assignment_nodes_names.extend([something<node> for node in
+ # recursive_assignment_nodes if isinstance(node.value, ast.Subscript)]
+
+ additional_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.Assign)
+ and isinstance(node.targets[0], ast.Name)
+ and node.targets[0].id in recursive_assignment_nodes_names
+ ]
+
+ # Remove the original, useless assignment node (the SOURCES += other_var)
+ for node in recursive_assignment_nodes:
+ source_assignment_nodes.remove(node)
+ # Add the other_var += [''] source-assignment
+ source_assignment_nodes.extend(additional_assignment_nodes)
+ else:
+ source_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.AugAssign)
+ and (
+ (isinstance(node.target, ast.Name) and node.target.id == "EXPORTS")
+ or (
+ isinstance(node.target, ast.Attribute)
+ and get_attribute_label(node.target).startswith("EXPORTS")
+ )
+ )
+ ]
+ source_assignment_nodes.extend(
+ [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.Assign)
+ and (
+ (
+ isinstance(node.targets[0], ast.Name)
+ and node.targets[0].id == "EXPORTS"
+ )
+ or (
+ isinstance(node.targets[0], ast.Attribute)
+ and get_attribute_label(node.targets[0]).startswith("EXPORTS")
+ )
+ )
+ ]
+ )
+
+ # Get the source-assignment-location for the node:
+ assignment_index = 1
+ for a in source_assignment_nodes:
+ source_assignment_location = (
+ node_to_readable_file_location(code, a) + " " + str(assignment_index)
+ )
+ source_filename_list = assignment_node_to_source_filename_list(code, a)
+
+ if not source_filename_list:
+ # In some cases (like generated source file lists) we will have an empty list.
+ # If that is the case, just omit the source assignment
+ continue
+
+ normalized_source_filename_list = [
+ normalize_filename(normalized_mozbuild_filename, f)
+ for f in source_filename_list
+ ]
+
+ if source_assignment_location in source_assignments:
+ source_assignment_location = node_to_readable_file_location(code, a)
+
+ assert (
+ source_assignment_location not in source_assignments
+ ), "In %s, two assignments have the same key ('%s')" % (
+ normalized_mozbuild_filename,
+ source_assignment_location,
+ )
+ source_assignments[source_assignment_location] = normalized_source_filename_list
+ assignment_index += 1
+
+ return (source_assignments, root, code)
+
+
+def unnormalize_filename(normalized_mozbuild_filename, normalized_filename):
+ if normalized_filename[0] == "/":
+ return normalized_filename
+
+ mozbuild_path = (
+ os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/") + "/"
+ )
+ return normalized_filename.replace(mozbuild_path, "")
+
+
+def normalize_filename(normalized_mozbuild_filename, filename):
+ if filename[0] == "/":
+ return filename
+
+ mozbuild_path = os.path.dirname(normalized_mozbuild_filename).replace(
+ os.path.sep, "/"
+ )
+ return os.path.join(mozbuild_path, filename).replace(os.path.sep, "/")
+
+
+def get_mozbuild_file_search_order(
+ normalized_filename,
+ moz_yaml_dir=None,
+ vendoring_dir=None,
+ all_mozbuild_filenames_normalized=None,
+):
+ """
+ Returns an ordered list of normalized moz.build filenames to consider for a given filename
+
+ normalized_filename: a source filename normalized to the gecko root
+
+ moz_yaml_dir: the path from gecko_root to the moz.yaml file (which is the root of the
+ moz.build files)
+
+ moz_yaml_dir: the path to where the library's source files are
+
+ all_mozbuild_filenames_normalized: (optional) the list of all third-party moz.build files
+ If all_mozbuild_filenames_normalized is not specified, we look in the filesystem.
+
+ The list is built out of two distinct steps.
+
+ In Step 1 we will walk up a directory tree, looking for moz.build files. We append moz.build
+ files in this order, preferring the lowest moz.build we find, then moving on to one in a
+ higher directory.
+ The directory we start in is a little complicated. We take the series of subdirectories
+ between vendoring_dir and the file in question, and then append them to the moz.yaml
+ directory.
+
+ Example:
+
+ .. code-block:: python
+
+ When moz_yaml directory != vendoring_directory:
+ moz_yaml_dir = foo/bar/
+ vendoring_dir = third_party/baz/
+ normalized_filename = third_party/baz/asm/arm/a.S
+ starting_directory: foo/bar/asm/arm/
+ When moz_yaml directory == vendoring_directory
+ (In this case, these variables will actually be 'None' but the algorthm is the same)
+ moz_yaml_dir = foo/bar/
+ vendoring_dir = foo/bar/
+ normalized_filename = foo/bar/asm/arm/a.S
+ starting_directory: foo/bar/asm/arm/
+
+ In Step 2 we get a bit desparate. When the vendoring directory and the moz_yaml directory are
+ not the same, there is no guarentee that the moz_yaml directory will adhere to the same
+ directory structure as the vendoring directory. And indeed it doesn't in some cases
+ (e.g. libdav1d.)
+ So in this situation we start at the root of the moz_yaml directory and walk downwards, adding
+ _any_ moz.build file we encounter to the list. Later on (in all cases, not just
+ moz_yaml_dir != vendoring_dir) we only consider a moz.build file if it has source files whose
+ directory matches the normalized_filename, so this step, though desparate, is safe-ish and
+ believe it or not has worked for some file additions.
+ """
+ ordered_list = []
+
+ if all_mozbuild_filenames_normalized is None:
+ assert os.path.isfile(
+ ".arcconfig"
+ ), "We do not seem to be running from the gecko root"
+
+ # The first time around, this variable name is incorrect.
+ # It's actually the full path+filename, not a directory.
+ test_directory = None
+ if (moz_yaml_dir, vendoring_dir) == (None, None):
+ # In this situation, the library is vendored into the same directory as
+ # the moz.build files. We can start traversing directories up from the file to
+ # add to find the correct moz.build file
+ test_directory = normalized_filename
+ elif moz_yaml_dir and vendoring_dir:
+ # In this situation, the library is vendored in a different place (typically
+ # third_party/foo) from the moz.build files.
+ subdirectory_path = normalized_filename.replace(vendoring_dir, "")
+ test_directory = os.path.join(moz_yaml_dir, subdirectory_path)
+ else:
+ raise Exception("If moz_yaml_dir or vendoring_dir are specified, both must be")
+
+ # Step 1
+ while (
+ len(os.path.dirname(test_directory).replace(os.path.sep, "/")) > 1
+ ): # While we are not at '/'
+ containing_directory = os.path.dirname(test_directory)
+
+ possible_normalized_mozbuild_filename = os.path.join(
+ containing_directory, "moz.build"
+ )
+
+ if not all_mozbuild_filenames_normalized:
+ if os.path.isfile(possible_normalized_mozbuild_filename):
+ ordered_list.append(possible_normalized_mozbuild_filename)
+ elif possible_normalized_mozbuild_filename in all_mozbuild_filenames_normalized:
+ ordered_list.append(possible_normalized_mozbuild_filename)
+
+ test_directory = containing_directory
+
+ # Step 2
+ if moz_yaml_dir:
+ for root, dirs, files in os.walk(moz_yaml_dir):
+ for f in files:
+ if f == "moz.build":
+ ordered_list.append(os.path.join(root, f))
+
+ return ordered_list
+
+
+def get_closest_mozbuild_file(
+ normalized_filename,
+ moz_yaml_dir=None,
+ vendoring_dir=None,
+ all_mozbuild_filenames_normalized=None,
+):
+ """
+ Returns the closest moz.build file in the directory tree to a normalized filename
+ """
+ r = get_mozbuild_file_search_order(
+ normalized_filename,
+ moz_yaml_dir,
+ vendoring_dir,
+ all_mozbuild_filenames_normalized,
+ )
+ return r[0] if r else None
+
+
+def filenames_directory_is_in_filename_list(
+ filename_normalized, list_of_normalized_filenames
+):
+ """
+ Given a normalized filename and a list of normalized filenames, first turn them into a
+ containing directory, and a list of containing directories. Then test if the containing
+ directory of the filename is in the list.
+
+ ex:
+ f = filenames_directory_is_in_filename_list
+ f("foo/bar/a.c", ["foo/b.c"]) -> false
+ f("foo/bar/a.c", ["foo/b.c", "foo/bar/c.c"]) -> true
+ f("foo/bar/a.c", ["foo/b.c", "foo/bar/baz/d.c"]) -> false
+ """
+ path_list = set(
+ [
+ os.path.dirname(f).replace(os.path.sep, "/")
+ for f in list_of_normalized_filenames
+ ]
+ )
+ return os.path.dirname(filename_normalized).replace(os.path.sep, "/") in path_list
+
+
+def find_all_posible_assignments_from_filename(source_assignments, filename_normalized):
+ """
+ Given a list of source assignments and a normalized filename, narrow the list to assignments
+ that contain a file whose directory matches the filename's directory.
+ """
+ possible_assignments = {}
+ for key, list_of_normalized_filenames in source_assignments.items():
+ if not list_of_normalized_filenames:
+ continue
+ if filenames_directory_is_in_filename_list(
+ filename_normalized, list_of_normalized_filenames
+ ):
+ possible_assignments[key] = list_of_normalized_filenames
+ return possible_assignments
+
+
+def guess_best_assignment(source_assignments, filename_normalized):
+ """
+ Given several assignments, all of which contain the same directory as the filename, pick one
+ we think is best and return its source-assignment-location.
+
+ We do this by looking at the filename itself (not just its directory) and picking the
+ assignment which contains a filename with the longest matching prefix.
+
+ e.g: "foo/asm_neon.c" compared to ["foo/main.c", "foo/all_utility.c"], ["foo/asm_arm.c"]
+ -> ["foo/asm_arm.c"] (match of `foo/asm_`)
+ """
+ length_of_longest_match = 0
+ source_assignment_location_of_longest_match = None
+ statistic_number_refinements = 0
+ statistic_length_logic = 0
+
+ for key, list_of_normalized_filenames in source_assignments.items():
+ for f in list_of_normalized_filenames:
+ if filename_normalized == f:
+ # Do not cheat by matching the prefix of the exact file
+ continue
+
+ prefix = os.path.commonprefix([filename_normalized, f])
+ if len(prefix) > length_of_longest_match:
+ statistic_number_refinements += 1
+ length_of_longest_match = len(prefix)
+ source_assignment_location_of_longest_match = key
+ elif len(prefix) == length_of_longest_match and len(
+ source_assignments[key]
+ ) > len(source_assignments[source_assignment_location_of_longest_match]):
+ statistic_number_refinements += 1
+ statistic_length_logic += 1
+ length_of_longest_match = len(prefix)
+ source_assignment_location_of_longest_match = key
+ return (
+ source_assignment_location_of_longest_match,
+ (statistic_number_refinements, statistic_length_logic),
+ )
+
+
+def edit_moz_build_file_to_add_file(
+ normalized_mozbuild_filename,
+ unnormalized_filename_to_add,
+ unnormalized_list_of_files,
+):
+ """
+ This function edits the moz.build file in-place
+
+ I had _really_ hoped to replace this whole damn thing with something that adds a
+ node to the AST, dumps the AST out, and then runs black on the file but there are
+ some issues:
+ - third party moz.build files (or maybe all moz.build files) aren't always run through black
+ - dumping the ast out losing comments
+
+ """
+
+ # Make sure that we only write in forward slashes
+ if "\\" in unnormalized_filename_to_add:
+ unnormalized_filename_to_add = unnormalized_filename_to_add.replace("\\", "/")
+
+ # add the file into the list, and then sort it in the same way the moz.build validator
+ # expects
+ unnormalized_list_of_files.append(unnormalized_filename_to_add)
+ unnormalized_list_of_files = alphabetical_sorted(unnormalized_list_of_files)
+
+ # we're going to add our file by doing a find/replace of an adjacent file in the list
+ indx_of_addition = unnormalized_list_of_files.index(unnormalized_filename_to_add)
+ indx_of_addition
+ if indx_of_addition == 0:
+ target_indx = 1
+ replace_before = False
+ else:
+ target_indx = indx_of_addition - 1
+ replace_before = True
+
+ find_str = unnormalized_list_of_files[target_indx]
+
+ # We will only perform the first replacement. This is because sometimes there's moz.build
+ # code like:
+ # SOURCES += ['file.cpp']
+ # SOURCES['file.cpp'].flags += ['-Winline']
+ # If we replaced every time we found the target, we would be inserting into that second
+ # line.
+ did_replace = False
+
+ with open(normalized_mozbuild_filename, mode="r") as file:
+ with open(normalized_mozbuild_filename + ".new", mode="wb") as output:
+ for line in file:
+ if not did_replace and find_str in line:
+ did_replace = True
+
+ # Okay, we found the line we need to edit, now we need to be ugly about it
+ # Grab the type of quote used in this moz.build file: single or double
+ quote_type = line[line.index(find_str) - 1]
+
+ if "[" not in line:
+ # We'll want to put our new file onto its own line
+ newline_to_add = "\n"
+ # And copy the indentation of the line we're adding adjacent to
+ indent_value = line[0 : line.index(quote_type)]
+ else:
+ # This is frustrating, we have the start of the array here. We aren't
+ # going to be able to indent things onto a newline properly. We're just
+ # going to have to stick it in on the same line.
+ newline_to_add = ""
+ indent_value = ""
+
+ find_str = "%s%s%s" % (quote_type, find_str, quote_type)
+ if replace_before:
+ replacement_tuple = (
+ find_str,
+ newline_to_add,
+ indent_value,
+ quote_type,
+ unnormalized_filename_to_add,
+ quote_type,
+ )
+ replace_str = "%s,%s%s%s%s%s" % replacement_tuple
+ else:
+ replacement_tuple = (
+ quote_type,
+ unnormalized_filename_to_add,
+ quote_type,
+ newline_to_add,
+ indent_value,
+ find_str,
+ )
+ replace_str = "%s%s%s,%s%s%s" % replacement_tuple
+
+ line = line.replace(find_str, replace_str)
+
+ output.write((line.rstrip() + "\n").encode("utf-8"))
+
+ shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename)
+
+
+def edit_moz_build_file_to_remove_file(
+ normalized_mozbuild_filename, unnormalized_filename_to_remove
+):
+ """
+ This function edits the moz.build file in-place
+ """
+
+ simple_file_line = re.compile(
+ "^\s*['\"]" + unnormalized_filename_to_remove + "['\"],*$"
+ )
+ did_replace = False
+
+ with open(normalized_mozbuild_filename, mode="r") as file:
+ with open(normalized_mozbuild_filename + ".new", mode="wb") as output:
+ for line in file:
+ if not did_replace and unnormalized_filename_to_remove in line:
+ did_replace = True
+
+ # If the line consists of just a single source file on it, then we're in the
+ # clear - we can just skip this line.
+ if simple_file_line.match(line):
+ # Do not output anything, just keep going.
+ continue
+
+ # Okay, so the line is a little more complicated.
+ quote_type = line[line.index(unnormalized_filename_to_remove) - 1]
+
+ if "[" in line or "]" in line:
+ find_str = "%s%s%s,*" % (
+ quote_type,
+ unnormalized_filename_to_remove,
+ quote_type,
+ )
+ line = re.sub(find_str, "", line)
+ else:
+ raise Exception(
+ "Got an unusual type of line we're trying to remove a file from:",
+ line,
+ )
+
+ output.write((line.rstrip() + "\n").encode("utf-8"))
+
+ shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename)
+
+
+def validate_directory_parameters(moz_yaml_dir, vendoring_dir):
+ # Validate the parameters
+ assert (moz_yaml_dir, vendoring_dir) == (None, None) or (
+ moz_yaml_dir and vendoring_dir
+ ), "If either moz_yaml_dir or vendoring_dir are specified, they both must be"
+
+ if moz_yaml_dir is not None and vendoring_dir is not None:
+ # Ensure they are provided with trailing slashes
+ moz_yaml_dir += "/" if moz_yaml_dir[-1] != "/" else ""
+ vendoring_dir += "/" if vendoring_dir[-1] != "/" else ""
+
+ return (moz_yaml_dir, vendoring_dir)
+
+
+HAS_ABSOLUTE = 1
+HAS_TRAVERSE_CHILD = 2
+HAS_RELATIVE_CHILD = 2 # behaves the same as above
+
+
+def get_file_reference_modes(source_assignments):
+ """
+ Given a set of source assignments, this function traverses through the
+ files references in those assignments to see if the files are referenced
+ using absolute paths (relative to gecko root) or relative paths.
+
+ It will return all the modes that are seen.
+ """
+ modes = set()
+
+ for key, list_of_normalized_filenames in source_assignments.items():
+ if not list_of_normalized_filenames:
+ continue
+ for file in list_of_normalized_filenames:
+ if file[0] == "/":
+ modes.add(HAS_ABSOLUTE)
+ elif file[0:2] == "../":
+ modes.add(HAS_TRAVERSE_CHILD)
+ else:
+ modes.add(HAS_RELATIVE_CHILD)
+ return modes
+
+
+def renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_act_on,
+):
+ """
+ Edit the normalized_filename_to_act_on to either
+ - Make it an absolute path from gecko root (if we're in that mode)
+ - Get a relative path from the vendoring directory to the yaml directory where the
+ moz.build file is (If they are in separate directories)
+ """
+ if mode == HAS_ABSOLUTE:
+ # If the moz.build file uses absolute paths from the gecko root, this is easy,
+ # all we need to do is prepend a '/' to indicate that
+ normalized_filename_to_act_on = "/" + normalized_filename_to_act_on
+ elif moz_yaml_dir and vendoring_dir:
+ # To re-normalize it in this case, we:
+ # (a) get the path from gecko_root to the moz.build file we are considering
+ # (b) compute a relative path from that directory to the file we want
+ # (c) because (b) started at the moz.build file's directory, it is not
+ # normalized to the gecko_root. Therefore we need to normalize it by
+ # prepending (a)
+ a = os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/")
+ b = os.path.relpath(normalized_filename_to_act_on, start=a).replace(
+ os.path.sep, "/"
+ )
+ c = os.path.join(a, b).replace(os.path.sep, "/")
+ normalized_filename_to_act_on = c
+
+ return normalized_filename_to_act_on
+
+
+#########################################################
+# PUBLIC API
+#########################################################
+
+
+class MozBuildRewriteException(Exception):
+ pass
+
+
+def remove_file_from_moz_build_file(
+ normalized_filename_to_remove, moz_yaml_dir=None, vendoring_dir=None
+):
+ """
+ Given a filename, relative to the gecko root (aka normalized), we look for the nearest
+ moz.build file, look in that file for the file, and then edit that moz.build file in-place.
+ """
+ moz_yaml_dir, vendoring_dir = validate_directory_parameters(
+ moz_yaml_dir, vendoring_dir
+ )
+
+ all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order(
+ normalized_filename_to_remove, moz_yaml_dir, vendoring_dir, None
+ )
+
+ # normalized_filename_to_remove is the path from gecko_root to the file. However, if we vendor
+ # separate from moz.yaml; then 'normalization' gets more complicated as explained above.
+ # We will need to re-normalize the filename for each moz.build file we want to test, so we
+ # save the original normalized filename for this purpose
+ original_normalized_filename_to_remove = normalized_filename_to_remove
+
+ # These are the two header file types specified in vendor_manifest.py > source_suffixes
+ if normalized_filename_to_remove.endswith(
+ ".h"
+ ) or normalized_filename_to_remove.endswith(".hpp"):
+ assignment_type = "header-files"
+ else:
+ assignment_type = "source-files"
+
+ for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename, assignment_type
+ )
+
+ modes = get_file_reference_modes(source_assignments)
+
+ for mode in modes:
+ normalized_filename_to_remove = renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_remove,
+ )
+
+ for key in source_assignments:
+ normalized_source_filename_list = source_assignments[key]
+ if normalized_filename_to_remove in normalized_source_filename_list:
+ unnormalized_filename_to_remove = unnormalize_filename(
+ normalized_mozbuild_filename, normalized_filename_to_remove
+ )
+ edit_moz_build_file_to_remove_file(
+ normalized_mozbuild_filename, unnormalized_filename_to_remove
+ )
+ return
+
+ normalized_filename_to_remove = original_normalized_filename_to_remove
+ raise MozBuildRewriteException("Could not remove " + normalized_filename_to_remove)
+
+
+def add_file_to_moz_build_file(
+ normalized_filename_to_add, moz_yaml_dir=None, vendoring_dir=None
+):
+ """
+ This is the overall function. Given a filename, relative to the gecko root (aka normalized),
+ we look for a moz.build file to add it to, look for the place in the moz.build file to add it,
+ and then edit that moz.build file in-place.
+
+ It accepted two optional parameters. If one is specified they both must be. If a library is
+ vendored in a separate place from the moz.yaml file, these parameters specify those two
+ directories.
+ """
+ moz_yaml_dir, vendoring_dir = validate_directory_parameters(
+ moz_yaml_dir, vendoring_dir
+ )
+
+ all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order(
+ normalized_filename_to_add, moz_yaml_dir, vendoring_dir, None
+ )
+
+ # normalized_filename_to_add is the path from gecko_root to the file. However, if we vendor
+ # separate from moz.yaml; then 'normalization' gets more complicated as explained above.
+ # We will need to re-normalize the filename for each moz.build file we want to test, so we
+ # save the original normalized filename for this purpose
+ original_normalized_filename_to_add = normalized_filename_to_add
+
+ if normalized_filename_to_add.endswith(".h") or normalized_filename_to_add.endswith(
+ ".hpp"
+ ):
+ assignment_type = "header-files"
+ else:
+ assignment_type = "source-files"
+
+ for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename, assignment_type
+ )
+
+ modes = get_file_reference_modes(source_assignments)
+
+ for mode in modes:
+ normalized_filename_to_add = renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_add,
+ )
+
+ possible_assignments = find_all_posible_assignments_from_filename(
+ source_assignments, normalized_filename_to_add
+ )
+
+ if len(possible_assignments) == 0:
+ normalized_filename_to_add = original_normalized_filename_to_add
+ continue
+
+ assert (
+ len(possible_assignments) > 0
+ ), "Could not find a single possible source assignment"
+ if len(possible_assignments) > 1:
+ best_guess, _ = guess_best_assignment(
+ possible_assignments, normalized_filename_to_add
+ )
+ chosen_source_assignment_location = best_guess
+ else:
+ chosen_source_assignment_location = list(possible_assignments.keys())[0]
+
+ guessed_list_containing_normalized_filenames = possible_assignments[
+ chosen_source_assignment_location
+ ]
+
+ # unnormalize filenames so we can edit the moz.build file. They rarely use full paths.
+ unnormalized_filename_to_add = unnormalize_filename(
+ normalized_mozbuild_filename, normalized_filename_to_add
+ )
+ unnormalized_list_of_files = [
+ unnormalize_filename(normalized_mozbuild_filename, f)
+ for f in guessed_list_containing_normalized_filenames
+ ]
+
+ edit_moz_build_file_to_add_file(
+ normalized_mozbuild_filename,
+ unnormalized_filename_to_add,
+ unnormalized_list_of_files,
+ )
+ return
+
+ raise MozBuildRewriteException(
+ "Could not find a single moz.build file to add " + normalized_filename_to_add
+ )
+
+
+#########################################################
+# TESTING CODE
+#########################################################
+
+
+def get_all_target_filenames_normalized(all_mozbuild_filenames_normalized):
+ """
+ Given a list of moz.build files, returns all the files listed in all the souce assignments
+ in the file.
+
+ This function is only used for debug/testing purposes - there is no reason to call this
+ as part of 'the algorithm'
+ """
+ all_target_filenames_normalized = []
+ for normalized_mozbuild_filename in all_mozbuild_filenames_normalized:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename
+ )
+ for key in source_assignments:
+ list_of_normalized_filenames = source_assignments[key]
+ all_target_filenames_normalized.extend(list_of_normalized_filenames)
+
+ return all_target_filenames_normalized
+
+
+def try_to_match_target_file(
+ all_mozbuild_filenames_normalized, target_filename_normalized
+):
+ """
+ Runs 'the algorithm' on a target file, and returns if the algorithm was successful
+
+ all_mozbuild_filenames_normalized: the list of all third-party moz.build files
+ target_filename_normalized - the target filename, normalized to the gecko root
+ """
+
+ # We do not update the statistics for failed matches, so save a copy
+ global statistics
+ backup_statistics = copy.deepcopy(statistics)
+
+ if "" == target_filename_normalized:
+ raise Exception("Received an empty target_filename_normalized")
+
+ normalized_mozbuild_filename = get_closest_mozbuild_file(
+ target_filename_normalized, None, None, all_mozbuild_filenames_normalized
+ )
+ if not normalized_mozbuild_filename:
+ return (False, "No moz.build file found")
+
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename
+ )
+ possible_assignments = find_all_posible_assignments_from_filename(
+ source_assignments, target_filename_normalized
+ )
+
+ if len(possible_assignments) == 0:
+ raise Exception("No possible assignments were found")
+ elif len(possible_assignments) > 1:
+ (
+ best_guess,
+ (statistic_number_refinements, statistic_length_logic),
+ ) = guess_best_assignment(possible_assignments, target_filename_normalized)
+ chosen_source_assignment_location = best_guess
+
+ statistics["needed_to_guess"] += 1
+
+ if len(possible_assignments) not in statistics["guess_candidates"]:
+ statistics["guess_candidates"][len(possible_assignments)] = 0
+ statistics["guess_candidates"][len(possible_assignments)] += 1
+
+ if statistic_number_refinements not in statistics["number_refinements"]:
+ statistics["number_refinements"][statistic_number_refinements] = 0
+ statistics["number_refinements"][statistic_number_refinements] += 1
+
+ if statistic_length_logic not in statistics["length_logic"]:
+ statistics["length_logic"][statistic_length_logic] = 0
+ statistics["length_logic"][statistic_length_logic] += 1
+
+ else:
+ chosen_source_assignment_location = list(possible_assignments.keys())[0]
+
+ guessed_list_containing_normalized_filenames = possible_assignments[
+ chosen_source_assignment_location
+ ]
+
+ if target_filename_normalized in guessed_list_containing_normalized_filenames:
+ return (True, None)
+
+ # Restore the copy of the statistics so we don't alter it for failed matches
+ statistics = backup_statistics
+ return (False, chosen_source_assignment_location)
+
+
+def get_gecko_root():
+ """
+ Using __file__ as a base, find the gecko root
+ """
+ gecko_root = None
+ directory_to_check = os.path.dirname(os.path.abspath(__file__))
+ while not os.path.isfile(os.path.join(directory_to_check, ".arcconfig")):
+ directory_to_check = os.path.dirname(directory_to_check)
+ if directory_to_check == "/":
+ print("Could not find gecko root")
+ sys.exit(1)
+
+ gecko_root = directory_to_check
+ return gecko_root
+
+
+def get_all_mozbuild_filenames(gecko_root):
+ """
+ Find all the third party moz.build files in the gecko repo
+ """
+ third_party_paths = open(
+ os.path.join(gecko_root, "tools", "rewriting", "ThirdPartyPaths.txt")
+ ).readlines()
+ all_mozbuild_filenames_normalized = []
+ for path in third_party_paths:
+ # We need shell=True because some paths are specified as globs
+ # We need an exception handler because sometimes the directory doesn't exist and find barfs
+ try:
+ output = subprocess.check_output(
+ "find %s -name moz.build" % os.path.join(gecko_root, path.strip()),
+ shell=True,
+ ).decode("utf-8")
+ for f in output.split("\n"):
+ f = f.replace("//", "/").strip().replace(gecko_root, "")[1:]
+ if f:
+ all_mozbuild_filenames_normalized.append(f)
+ except Exception:
+ pass
+
+ return all_mozbuild_filenames_normalized
+
+
+def test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized):
+ """
+ Run the algorithm on every source file in a third party moz.build file and output the results
+ """
+ all_mozbuild_filenames_normalized = [
+ f for f in all_mozbuild_filenames_normalized if "webrtc" not in f
+ ]
+ all_target_filenames_normalized = get_all_target_filenames_normalized(
+ all_mozbuild_filenames_normalized
+ )
+
+ total_attempted = 0
+ failed_matched = []
+ successfully_matched = 0
+
+ print("Going to try to match %i files..." % len(all_target_filenames_normalized))
+ for target_filename_normalized in all_target_filenames_normalized:
+ result, wrong_guess = try_to_match_target_file(
+ all_mozbuild_filenames_normalized, target_filename_normalized
+ )
+
+ total_attempted += 1
+ if result:
+ successfully_matched += 1
+ else:
+ failed_matched.append((target_filename_normalized, wrong_guess))
+ if total_attempted % 100 == 0:
+ print("Progress:", total_attempted)
+
+ print(
+ "Successfully Matched %i of %i files" % (successfully_matched, total_attempted)
+ )
+ if failed_matched:
+ print("Failed files:")
+ for f in failed_matched:
+ print("\t", f[0], f[1])
+ print("Statistics:")
+ pprint(statistics)
+
+
+if __name__ == "__main__":
+ gecko_root = get_gecko_root()
+ os.chdir(gecko_root)
+
+ add_file_to_moz_build_file(
+ "third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h",
+ "media/libjxl",
+ "third_party/jpeg-xl",
+ )
+
+ # all_mozbuild_filenames_normalized = get_all_mozbuild_filenames(gecko_root)
+ # test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized)