summaryrefslogtreecommitdiffstats
path: root/src/arrow/cpp/build-support/lintutils.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/cpp/build-support/lintutils.py')
-rw-r--r--src/arrow/cpp/build-support/lintutils.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/arrow/cpp/build-support/lintutils.py b/src/arrow/cpp/build-support/lintutils.py
new file mode 100644
index 000000000..2386eb2e6
--- /dev/null
+++ b/src/arrow/cpp/build-support/lintutils.py
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import multiprocessing as mp
+import os
+from fnmatch import fnmatch
+from subprocess import Popen
+
+
+def chunk(seq, n):
+ """
+ divide a sequence into equal sized chunks
+ (the last chunk may be smaller, but won't be empty)
+ """
+ chunks = []
+ some = []
+ for element in seq:
+ if len(some) == n:
+ chunks.append(some)
+ some = []
+ some.append(element)
+ if len(some) > 0:
+ chunks.append(some)
+ return chunks
+
+
+def dechunk(chunks):
+ "flatten chunks into a single list"
+ seq = []
+ for chunk in chunks:
+ seq.extend(chunk)
+ return seq
+
+
+def run_parallel(cmds, **kwargs):
+ """
+ Run each of cmds (with shared **kwargs) using subprocess.Popen
+ then wait for all of them to complete.
+ Runs batches of multiprocessing.cpu_count() * 2 from cmds
+ returns a list of tuples containing each process'
+ returncode, stdout, stderr
+ """
+ complete = []
+ for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
+ procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
+ for proc in procs_batch:
+ stdout, stderr = proc.communicate()
+ complete.append((proc.returncode, stdout, stderr))
+ return complete
+
+
+_source_extensions = '''
+.h
+.cc
+.cpp
+'''.split()
+
+
+def get_sources(source_dir, exclude_globs=[]):
+ sources = []
+ for directory, subdirs, basenames in os.walk(source_dir):
+ for path in [os.path.join(directory, basename)
+ for basename in basenames]:
+ # filter out non-source files
+ if os.path.splitext(path)[1] not in _source_extensions:
+ continue
+
+ path = os.path.abspath(path)
+
+ # filter out files that match the globs in the globs file
+ if any([fnmatch(path, glob) for glob in exclude_globs]):
+ continue
+
+ sources.append(path)
+ return sources
+
+
+def stdout_pathcolonline(completed_process, filenames):
+ """
+ given a completed process which may have reported some files as problematic
+ by printing the path name followed by ':' then a line number, examine
+ stdout and return the set of actually reported file names
+ """
+ returncode, stdout, stderr = completed_process
+ bfilenames = set()
+ for filename in filenames:
+ bfilenames.add(filename.encode('utf-8') + b':')
+ problem_files = set()
+ for line in stdout.splitlines():
+ for filename in bfilenames:
+ if line.startswith(filename):
+ problem_files.add(filename.decode('utf-8'))
+ bfilenames.remove(filename)
+ break
+ return problem_files, stdout