diff options
Diffstat (limited to 'src/arrow/cpp/build-support/lintutils.py')
-rw-r--r-- | src/arrow/cpp/build-support/lintutils.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/arrow/cpp/build-support/lintutils.py b/src/arrow/cpp/build-support/lintutils.py new file mode 100644 index 000000000..2386eb2e6 --- /dev/null +++ b/src/arrow/cpp/build-support/lintutils.py @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import multiprocessing as mp +import os +from fnmatch import fnmatch +from subprocess import Popen + + +def chunk(seq, n): + """ + divide a sequence into equal sized chunks + (the last chunk may be smaller, but won't be empty) + """ + chunks = [] + some = [] + for element in seq: + if len(some) == n: + chunks.append(some) + some = [] + some.append(element) + if len(some) > 0: + chunks.append(some) + return chunks + + +def dechunk(chunks): + "flatten chunks into a single list" + seq = [] + for chunk in chunks: + seq.extend(chunk) + return seq + + +def run_parallel(cmds, **kwargs): + """ + Run each of cmds (with shared **kwargs) using subprocess.Popen + then wait for all of them to complete. + Runs batches of multiprocessing.cpu_count() * 2 from cmds + returns a list of tuples containing each process' + returncode, stdout, stderr + """ + complete = [] + for cmds_batch in chunk(cmds, mp.cpu_count() * 2): + procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch] + for proc in procs_batch: + stdout, stderr = proc.communicate() + complete.append((proc.returncode, stdout, stderr)) + return complete + + +_source_extensions = ''' +.h +.cc +.cpp +'''.split() + + +def get_sources(source_dir, exclude_globs=[]): + sources = [] + for directory, subdirs, basenames in os.walk(source_dir): + for path in [os.path.join(directory, basename) + for basename in basenames]: + # filter out non-source files + if os.path.splitext(path)[1] not in _source_extensions: + continue + + path = os.path.abspath(path) + + # filter out files that match the globs in the globs file + if any([fnmatch(path, glob) for glob in exclude_globs]): + continue + + sources.append(path) + return sources + + +def stdout_pathcolonline(completed_process, filenames): + """ + given a completed process which may have reported some files as problematic + by printing the path name followed by ':' then a line number, examine + stdout and return the set of actually reported file names + """ + returncode, stdout, stderr = completed_process + bfilenames = set() + for filename in filenames: + bfilenames.add(filename.encode('utf-8') + b':') + problem_files = set() + for line in stdout.splitlines(): + for filename in bfilenames: + if line.startswith(filename): + problem_files.add(filename.decode('utf-8')) + bfilenames.remove(filename) + break + return problem_files, stdout |