# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import multiprocessing as mp import os from fnmatch import fnmatch from subprocess import Popen def chunk(seq, n): """ divide a sequence into equal sized chunks (the last chunk may be smaller, but won't be empty) """ chunks = [] some = [] for element in seq: if len(some) == n: chunks.append(some) some = [] some.append(element) if len(some) > 0: chunks.append(some) return chunks def dechunk(chunks): "flatten chunks into a single list" seq = [] for chunk in chunks: seq.extend(chunk) return seq def run_parallel(cmds, **kwargs): """ Run each of cmds (with shared **kwargs) using subprocess.Popen then wait for all of them to complete. Runs batches of multiprocessing.cpu_count() * 2 from cmds returns a list of tuples containing each process' returncode, stdout, stderr """ complete = [] for cmds_batch in chunk(cmds, mp.cpu_count() * 2): procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch] for proc in procs_batch: stdout, stderr = proc.communicate() complete.append((proc.returncode, stdout, stderr)) return complete _source_extensions = ''' .h .cc .cpp '''.split() def get_sources(source_dir, exclude_globs=[]): sources = [] for directory, subdirs, basenames in os.walk(source_dir): for path in [os.path.join(directory, basename) for basename in basenames]: # filter out non-source files if os.path.splitext(path)[1] not in _source_extensions: continue path = os.path.abspath(path) # filter out files that match the globs in the globs file if any([fnmatch(path, glob) for glob in exclude_globs]): continue sources.append(path) return sources def stdout_pathcolonline(completed_process, filenames): """ given a completed process which may have reported some files as problematic by printing the path name followed by ':' then a line number, examine stdout and return the set of actually reported file names """ returncode, stdout, stderr = completed_process bfilenames = set() for filename in filenames: bfilenames.add(filename.encode('utf-8') + b':') problem_files = set() for line in stdout.splitlines(): for filename in bfilenames: if line.startswith(filename): problem_files.add(filename.decode('utf-8')) bfilenames.remove(filename) break return problem_files, stdout