diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/rocksdb/build_tools | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/build_tools')
23 files changed, 12431 insertions, 0 deletions
diff --git a/src/rocksdb/build_tools/RocksDBCommonHelper.php b/src/rocksdb/build_tools/RocksDBCommonHelper.php new file mode 100644 index 00000000..e7bfb520 --- /dev/null +++ b/src/rocksdb/build_tools/RocksDBCommonHelper.php @@ -0,0 +1,377 @@ +<?php +// Copyright 2004-present Facebook. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Name of the environment variables which need to be set by the entity which +// triggers continuous runs so that code at the end of the file gets executed +// and Sandcastle run starts. +const ENV_POST_RECEIVE_HOOK = "POST_RECEIVE_HOOK"; +const ENV_HTTPS_APP_VALUE = "HTTPS_APP_VALUE"; +const ENV_HTTPS_TOKEN_VALUE = "HTTPS_TOKEN_VALUE"; + +const PRIMARY_TOKEN_FILE = '/home/krad/.sandcastle'; +const CONT_RUN_ALIAS = "leveldb"; + +////////////////////////////////////////////////////////////////////// +/* Run tests in sandcastle */ +function postURL($diffID, $url) { + assert(strlen($diffID) > 0); + assert(is_numeric($diffID)); + assert(strlen($url) > 0); + + $cmd_args = array( + 'diff_id' => (int)$diffID, + 'name' => sprintf( + 'click here for sandcastle tests for D%d', + (int)$diffID + ), + 'link' => $url + ); + $cmd = 'echo ' . escapeshellarg(json_encode($cmd_args)) + . ' | arc call-conduit differential.updateunitresults'; + + shell_exec($cmd); +} + +function buildUpdateTestStatusCmd($diffID, $test, $status) { + assert(strlen($diffID) > 0); + assert(is_numeric($diffID)); + assert(strlen($test) > 0); + assert(strlen($status) > 0); + + $cmd_args = array( + 'diff_id' => (int)$diffID, + 'name' => $test, + 'result' => $status + ); + + $cmd = 'echo ' . escapeshellarg(json_encode($cmd_args)) + . ' | arc call-conduit differential.updateunitresults'; + + return $cmd; +} + +function updateTestStatus($diffID, $test) { + assert(strlen($diffID) > 0); + assert(is_numeric($diffID)); + assert(strlen($test) > 0); + + shell_exec(buildUpdateTestStatusCmd($diffID, $test, "waiting")); +} + +function getSteps($applyDiff, $diffID, $username, $test) { + assert(strlen($username) > 0); + assert(strlen($test) > 0); + + if ($applyDiff) { + assert(strlen($diffID) > 0); + assert(is_numeric($diffID)); + + $arcrc_content = (PHP_OS == "Darwin" ? + exec("cat ~/.arcrc | gzip -f | base64") : + exec("cat ~/.arcrc | gzip -f | base64 -w0")); + assert(strlen($arcrc_content) > 0); + + // Sandcastle machines don't have arc setup. We copy the user certificate + // and authenticate using that in Sandcastle. + $setup = array( + "name" => "Setup arcrc", + "shell" => "echo " . escapeshellarg($arcrc_content) . " | base64 --decode" + . " | gzip -d > ~/.arcrc", + "user" => "root" + ); + + // arc demands certain permission on its config. + // also fix the sticky bit issue in sandcastle + $fix_permission = array( + "name" => "Fix environment", + "shell" => "chmod 600 ~/.arcrc && chmod +t /dev/shm", + "user" => "root" + ); + + // Construct the steps in the order of execution. + $steps[] = $setup; + $steps[] = $fix_permission; + } + + // fbcode is a sub-repo. We cannot patch until we add it to ignore otherwise + // Git thinks it is an uncommitted change. + $fix_git_ignore = array( + "name" => "Fix git ignore", + "shell" => "echo fbcode >> .git/info/exclude", + "user" => "root" + ); + + // This fixes "FATAL: ThreadSanitizer can not mmap the shadow memory" + // Source: + // https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#FAQ + $fix_kernel_issue = array( + "name" => "Fix kernel issue with tsan", + "shell" => "echo 2 >/proc/sys/kernel/randomize_va_space", + "user" => "root" + ); + + $steps[] = $fix_git_ignore; + $steps[] = $fix_kernel_issue; + + // This will be the command used to execute particular type of tests. + $cmd = ""; + + if ($applyDiff) { + // Patch the code (keep your fingures crossed). + $patch = array( + "name" => "Patch " . $diffID, + "shell" => "arc --arcrc-file ~/.arcrc " + . "patch --nocommit --diff " . escapeshellarg($diffID), + "user" => "root" + ); + + $steps[] = $patch; + + updateTestStatus($diffID, $test); + $cmd = buildUpdateTestStatusCmd($diffID, $test, "running") . "; "; + } + + // Run the actual command. + $cmd = $cmd . "J=$(nproc) ./build_tools/precommit_checker.py " . + escapeshellarg($test) . "; exit_code=$?; "; + + if ($applyDiff) { + $cmd = $cmd . "([[ \$exit_code -eq 0 ]] &&" + . buildUpdateTestStatusCmd($diffID, $test, "pass") . ")" + . "||" . buildUpdateTestStatusCmd($diffID, $test, "fail") + . "; "; + } + + // shell command to sort the tests based on exit code and print + // the output of the log files. + $cat_sorted_logs = " + while read code log_file; + do echo \"################ cat \$log_file [exit_code : \$code] ################\"; + cat \$log_file; + done < <(tail -n +2 LOG | sort -k7,7n -k4,4gr | awk '{print \$7,\$NF}')"; + + // Shell command to cat all log files + $cat_all_logs = "for f in `ls t/!(run-*)`; do echo \$f;cat \$f; done"; + + // If LOG file exist use it to cat log files sorted by exit code, otherwise + // cat everything + $logs_cmd = "if [ -f LOG ]; then {$cat_sorted_logs}; else {$cat_all_logs}; fi"; + + $cmd = $cmd . " cat /tmp/precommit-check.log" + . "; shopt -s extglob; {$logs_cmd}" + . "; shopt -u extglob; [[ \$exit_code -eq 0 ]]"; + assert(strlen($cmd) > 0); + + $run_test = array( + "name" => "Run " . $test, + "shell" => $cmd, + "user" => "root", + "parser" => "python build_tools/error_filter.py " . escapeshellarg($test), + ); + + $steps[] = $run_test; + + if ($applyDiff) { + // Clean up the user arc config we are using. + $cleanup = array( + "name" => "Arc cleanup", + "shell" => "rm -f ~/.arcrc", + "user" => "root" + ); + + $steps[] = $cleanup; + } + + assert(count($steps) > 0); + return $steps; +} + +function getSandcastleConfig() { + $sandcastle_config = array(); + + $cwd = getcwd(); + $cwd_token_file = "{$cwd}/.sandcastle"; + // This is a case when we're executed from a continuous run. Fetch the values + // from the environment. + if (getenv(ENV_POST_RECEIVE_HOOK)) { + $sandcastle_config[0] = getenv(ENV_HTTPS_APP_VALUE); + $sandcastle_config[1] = getenv(ENV_HTTPS_TOKEN_VALUE); + } else { + // This is a typical `[p]arc diff` case. Fetch the values from the specific + // configuration files. + for ($i = 0; $i < 50; $i++) { + if (file_exists(PRIMARY_TOKEN_FILE) || + file_exists($cwd_token_file)) { + break; + } + // If we failed to fetch the tokens, sleep for 0.2 second and try again + usleep(200000); + } + assert(file_exists(PRIMARY_TOKEN_FILE) || + file_exists($cwd_token_file)); + + // Try the primary location first, followed by a secondary. + if (file_exists(PRIMARY_TOKEN_FILE)) { + $cmd = 'cat ' . PRIMARY_TOKEN_FILE; + } else { + $cmd = 'cat ' . escapeshellarg($cwd_token_file); + } + + assert(strlen($cmd) > 0); + $sandcastle_config = explode(':', rtrim(shell_exec($cmd))); + } + + // In this case be very explicit about the implications. + if (count($sandcastle_config) != 2) { + echo "Sandcastle configuration files don't contain valid information " . + "or the necessary environment variables aren't defined. Unable " . + "to validate the code changes."; + exit(1); + } + + assert(strlen($sandcastle_config[0]) > 0); + assert(strlen($sandcastle_config[1]) > 0); + assert(count($sandcastle_config) > 0); + + return $sandcastle_config; +} + +// This function can be called either from `[p]arc diff` command or during +// the Git post-receive hook. + function startTestsInSandcastle($applyDiff, $workflow, $diffID) { + // Default options don't terminate on failure, but that's what we want. In + // the current case we use assertions intentionally as "terminate on failure + // invariants". + assert_options(ASSERT_BAIL, true); + + // In case of a diff we'll send notificatios to the author. Else it'll go to + // the entire team because failures indicate that build quality has regressed. + $username = $applyDiff ? exec("whoami") : CONT_RUN_ALIAS; + assert(strlen($username) > 0); + + if ($applyDiff) { + assert($workflow); + assert(strlen($diffID) > 0); + assert(is_numeric($diffID)); + } + + // List of tests we want to run in Sandcastle. + $tests = array("unit", "unit_non_shm", "unit_481", "clang_unit", "tsan", + "asan", "lite_test", "valgrind", "release", "release_481", + "clang_release", "clang_analyze", "code_cov", + "java_build", "no_compression", "unity", "ubsan"); + + $send_email_template = array( + 'type' => 'email', + 'triggers' => array('fail'), + 'emails' => array($username . '@fb.com'), + ); + + // Construct a job definition for each test and add it to the master plan. + foreach ($tests as $test) { + $stepName = "RocksDB diff " . $diffID . " test " . $test; + + if (!$applyDiff) { + $stepName = "RocksDB continuous integration test " . $test; + } + + $arg[] = array( + "name" => $stepName, + "report" => array($send_email_template), + "steps" => getSteps($applyDiff, $diffID, $username, $test) + ); + } + + // We cannot submit the parallel execution master plan to Sandcastle and + // need supply the job plan as a determinator. So we construct a small job + // that will spit out the master job plan which Sandcastle will parse and + // execute. Why compress the job definitions? Otherwise we run over the max + // string size. + $cmd = "echo " . base64_encode(json_encode($arg)) + . (PHP_OS == "Darwin" ? + " | gzip -f | base64" : + " | gzip -f | base64 -w0"); + assert(strlen($cmd) > 0); + + $arg_encoded = shell_exec($cmd); + assert(strlen($arg_encoded) > 0); + + $runName = "Run diff " . $diffID . "for user " . $username; + + if (!$applyDiff) { + $runName = "RocksDB continuous integration build and test run"; + } + + $command = array( + "name" => $runName, + "steps" => array() + ); + + $command["steps"][] = array( + "name" => "Generate determinator", + "shell" => "echo " . $arg_encoded . " | base64 --decode | gzip -d" + . " | base64 --decode", + "determinator" => true, + "user" => "root" + ); + + // Submit to Sandcastle. + $url = 'https://interngraph.intern.facebook.com/sandcastle/create'; + + $job = array( + 'command' => 'SandcastleUniversalCommand', + 'args' => $command, + 'capabilities' => array( + 'vcs' => 'rocksdb-int-git', + 'type' => 'lego', + ), + 'hash' => 'origin/master', + 'user' => $username, + 'alias' => 'rocksdb-precommit', + 'tags' => array('rocksdb'), + 'description' => 'Rocksdb precommit job', + ); + + // Fetch the configuration necessary to submit a successful HTTPS request. + $sandcastle_config = getSandcastleConfig(); + + $app = $sandcastle_config[0]; + $token = $sandcastle_config[1]; + + $cmd = 'curl -s -k ' + . ' -F app=' . escapeshellarg($app) + . ' -F token=' . escapeshellarg($token) + . ' -F job=' . escapeshellarg(json_encode($job)) + .' ' . escapeshellarg($url); + + $output = shell_exec($cmd); + assert(strlen($output) > 0); + + // Extract Sandcastle URL from the response. + preg_match('/url": "(.+)"/', $output, $sandcastle_url); + + assert(count($sandcastle_url) > 0, "Unable to submit Sandcastle request."); + assert(strlen($sandcastle_url[1]) > 0, "Unable to extract Sandcastle URL."); + + if ($applyDiff) { + echo "\nSandcastle URL: " . $sandcastle_url[1] . "\n"; + // Ask Phabricator to display it on the diff UI. + postURL($diffID, $sandcastle_url[1]); + } else { + echo "Continuous integration started Sandcastle tests. You can look at "; + echo "the progress at:\n" . $sandcastle_url[1] . "\n"; + } +} + +// Continuous run cript will set the environment variable and based on that +// we'll trigger the execution of tests in Sandcastle. In that case we don't +// need to apply any diffs and there's no associated workflow either. +if (getenv(ENV_POST_RECEIVE_HOOK)) { + startTestsInSandcastle( + false /* $applyDiff */, + NULL /* $workflow */, + NULL /* $diffID */); +} diff --git a/src/rocksdb/build_tools/amalgamate.py b/src/rocksdb/build_tools/amalgamate.py new file mode 100755 index 00000000..548b1e8c --- /dev/null +++ b/src/rocksdb/build_tools/amalgamate.py @@ -0,0 +1,110 @@ +#!/usr/bin/python + +# amalgamate.py creates an amalgamation from a unity build. +# It can be run with either Python 2 or 3. +# An amalgamation consists of a header that includes the contents of all public +# headers and a source file that includes the contents of all source files and +# private headers. +# +# This script works by starting with the unity build file and recursively expanding +# #include directives. If the #include is found in a public include directory, +# that header is expanded into the amalgamation header. +# +# A particular header is only expanded once, so this script will +# break if there are multiple inclusions of the same header that are expected to +# expand differently. Similarly, this type of code causes issues: +# +# #ifdef FOO +# #include "bar.h" +# // code here +# #else +# #include "bar.h" // oops, doesn't get expanded +# // different code here +# #endif +# +# The solution is to move the include out of the #ifdef. + +from __future__ import print_function + +import argparse +from os import path +import re +import sys + +include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$') +included = set() +excluded = set() + +def find_header(name, abs_path, include_paths): + samedir = path.join(path.dirname(abs_path), name) + if path.exists(samedir): + return samedir + for include_path in include_paths: + include_path = path.join(include_path, name) + if path.exists(include_path): + return include_path + return None + +def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths): + if include_path in included: + return False + + included.add(include_path) + with open(include_path) as f: + print('#line 1 "{}"'.format(include_path), file=source_out) + process_file(f, include_path, source_out, header_out, include_paths, public_include_paths) + return True + +def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths): + for (line, text) in enumerate(f): + m = include_re.match(text) + if m: + filename = m.groups()[0] + # first check private headers + include_path = find_header(filename, abs_path, include_paths) + if include_path: + if include_path in excluded: + source_out.write(text) + expanded = False + else: + expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths) + else: + # now try public headers + include_path = find_header(filename, abs_path, public_include_paths) + if include_path: + # found public header + expanded = False + if include_path in excluded: + source_out.write(text) + else: + expand_include(include_path, f, abs_path, header_out, None, public_include_paths, []) + else: + sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line)) + + if expanded: + print('#line {} "{}"'.format(line+1, abs_path), file=source_out) + elif text != "#pragma once\n": + source_out.write(text) + +def main(): + parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation") + parser.add_argument("source", help="source file") + parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers") + parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers") + parser.add_argument("-x", action="append", dest="excluded", help="excluded header files") + parser.add_argument("-o", dest="source_out", help="output C++ file", required=True) + parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True) + args = parser.parse_args() + + include_paths = list(map(path.abspath, args.include_paths or [])) + public_include_paths = list(map(path.abspath, args.public_include_paths or [])) + excluded.update(map(path.abspath, args.excluded or [])) + filename = args.source + abs_path = path.abspath(filename) + with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out: + print('#line 1 "{}"'.format(filename), file=source_out) + print('#include "{}"'.format(header_out.name), file=source_out) + process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths) + +if __name__ == "__main__": + main() diff --git a/src/rocksdb/build_tools/build_detect_platform b/src/rocksdb/build_tools/build_detect_platform new file mode 100755 index 00000000..057f77ec --- /dev/null +++ b/src/rocksdb/build_tools/build_detect_platform @@ -0,0 +1,642 @@ +#!/usr/bin/env bash +# +# Detects OS we're compiling on and outputs a file specified by the first +# argument, which in turn gets read while processing Makefile. +# +# The output will set the following variables: +# CC C Compiler path +# CXX C++ Compiler path +# PLATFORM_LDFLAGS Linker flags +# JAVA_LDFLAGS Linker flags for RocksDBJava +# JAVA_STATIC_LDFLAGS Linker flags for RocksDBJava static build +# PLATFORM_SHARED_EXT Extension for shared libraries +# PLATFORM_SHARED_LDFLAGS Flags for building shared library +# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library +# PLATFORM_CCFLAGS C compiler flags +# PLATFORM_CXXFLAGS C++ compiler flags. Will contain: +# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned +# shared libraries, empty otherwise. +# FIND Command for the find utility +# WATCH Command for the watch utility +# +# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following: +# +# -DROCKSDB_PLATFORM_POSIX if posix-platform based +# -DSNAPPY if the Snappy library is present +# -DLZ4 if the LZ4 library is present +# -DZSTD if the ZSTD library is present +# -DNUMA if the NUMA library is present +# -DTBB if the TBB library is present +# +# Using gflags in rocksdb: +# Our project depends on gflags, which requires users to take some extra steps +# before they can compile the whole repository: +# 1. Install gflags. You may download it from here: +# https://gflags.github.io/gflags/ (Mac users can `brew install gflags`) +# 2. Once installed, add the include path for gflags to your CPATH env var and +# the lib path to LIBRARY_PATH. If installed with default settings, the lib +# will be /usr/local/lib and the include path will be /usr/local/include + +OUTPUT=$1 +if test -z "$OUTPUT"; then + echo "usage: $0 <output-filename>" >&2 + exit 1 +fi + +# we depend on C++11 +PLATFORM_CXXFLAGS="-std=c++11" +# we currently depend on POSIX platform +COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX" + +# Default to fbcode gcc on internal fb machines +if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then + FBCODE_BUILD="true" + # If we're compiling with TSAN we need pic build + PIC_BUILD=$COMPILE_WITH_TSAN + if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then + # we need this to build with MySQL. Don't use for other purposes. + source "$PWD/build_tools/fbcode_config4.8.1.sh" + elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007" ]; then + source "$PWD/build_tools/fbcode_config_platform007.sh" + else + source "$PWD/build_tools/fbcode_config.sh" + fi +fi + +# Delete existing output, if it exists +rm -f "$OUTPUT" +touch "$OUTPUT" + +if test -z "$CC"; then + if [ -x "$(command -v cc)" ]; then + CC=cc + elif [ -x "$(command -v clang)" ]; then + CC=clang + else + CC=cc + fi +fi + +if test -z "$CXX"; then + if [ -x "$(command -v g++)" ]; then + CXX=g++ + elif [ -x "$(command -v clang++)" ]; then + CXX=clang++ + else + CXX=g++ + fi +fi + +# Detect OS +if test -z "$TARGET_OS"; then + TARGET_OS=`uname -s` +fi + +if test -z "$TARGET_ARCHITECTURE"; then + TARGET_ARCHITECTURE=`uname -m` +fi + +if test -z "$CLANG_SCAN_BUILD"; then + CLANG_SCAN_BUILD=scan-build +fi + +if test -z "$CLANG_ANALYZER"; then + CLANG_ANALYZER=$(command -v clang++ 2> /dev/null) +fi + +if test -z "$FIND"; then + FIND=find +fi + +if test -z "$WATCH"; then + WATCH=watch +fi + +COMMON_FLAGS="$COMMON_FLAGS ${CFLAGS}" +CROSS_COMPILE= +PLATFORM_CCFLAGS= +PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS" +PLATFORM_SHARED_EXT="so" +PLATFORM_SHARED_LDFLAGS="-Wl,--no-as-needed -shared -Wl,-soname -Wl," +PLATFORM_SHARED_CFLAGS="-fPIC" +PLATFORM_SHARED_VERSIONED=true + +# generic port files (working on all platform by #ifdef) go directly in /port +GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "` + +# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp +case "$TARGET_OS" in + Darwin) + PLATFORM=OS_MACOSX + COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX" + PLATFORM_SHARED_EXT=dylib + PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " + # PORT_FILES=port/darwin/darwin_specific.cc + ;; + IOS) + PLATFORM=IOS + COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX -DIOS_CROSS_COMPILE -DROCKSDB_LITE" + PLATFORM_SHARED_EXT=dylib + PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " + CROSS_COMPILE=true + PLATFORM_SHARED_VERSIONED= + ;; + Linux) + PLATFORM=OS_LINUX + COMMON_FLAGS="$COMMON_FLAGS -DOS_LINUX" + if [ -z "$USE_CLANG" ]; then + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" + else + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" + fi + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" + # PORT_FILES=port/linux/linux_specific.cc + ;; + SunOS) + PLATFORM=OS_SOLARIS + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS -m64" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -static-libstdc++ -static-libgcc -m64" + # PORT_FILES=port/sunos/sunos_specific.cc + ;; + AIX) + PLATFORM=OS_AIX + CC=gcc + COMMON_FLAGS="$COMMON_FLAGS -maix64 -pthread -fno-builtin-memcmp -D_REENTRANT -DOS_AIX -D__STDC_FORMAT_MACROS" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread -lpthread -lrt -maix64 -static-libstdc++ -static-libgcc" + # PORT_FILES=port/aix/aix_specific.cc + ;; + FreeBSD) + PLATFORM=OS_FREEBSD + CXX=clang++ + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" + # PORT_FILES=port/freebsd/freebsd_specific.cc + ;; + NetBSD) + PLATFORM=OS_NETBSD + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lgcc_s" + # PORT_FILES=port/netbsd/netbsd_specific.cc + ;; + OpenBSD) + PLATFORM=OS_OPENBSD + CXX=clang++ + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread" + # PORT_FILES=port/openbsd/openbsd_specific.cc + FIND=gfind + WATCH=gnuwatch + ;; + DragonFly) + PLATFORM=OS_DRAGONFLYBSD + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" + # PORT_FILES=port/dragonfly/dragonfly_specific.cc + ;; + Cygwin) + PLATFORM=CYGWIN + PLATFORM_SHARED_CFLAGS="" + PLATFORM_CXXFLAGS="-std=gnu++11" + COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN" + if [ -z "$USE_CLANG" ]; then + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" + else + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" + fi + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" + # PORT_FILES=port/linux/linux_specific.cc + ;; + OS_ANDROID_CROSSCOMPILE) + PLATFORM=OS_ANDROID + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DROCKSDB_PLATFORM_POSIX" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS " # All pthread features are in the Android C library + # PORT_FILES=port/android/android.cc + CROSS_COMPILE=true + ;; + *) + echo "Unknown platform!" >&2 + exit 1 +esac + +PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}" +JAVA_LDFLAGS="$PLATFORM_LDFLAGS" +JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS" + +if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then + # Cross-compiling; do not try any compilation tests. + # Also don't need any compilation tests if compiling on fbcode + true +else + if ! test $ROCKSDB_DISABLE_FALLOCATE; then + # Test whether fallocate is available + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <fcntl.h> + #include <linux/falloc.h> + int main() { + int fd = open("/dev/null", 0); + fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024); + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT" + fi + fi + + if ! test $ROCKSDB_DISABLE_SNAPPY; then + # Test whether Snappy library is installed + # http://code.google.com/p/snappy/ + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <snappy.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lsnappy" + fi + fi + + if ! test $ROCKSDB_DISABLE_GFLAGS; then + # Test whether gflags library is installed + # http://gflags.github.io/gflags/ + # check if the namespace is gflags + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + #include <gflags/gflags.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" + else + # check if namespace is google + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + #include <gflags/gflags.h> + using namespace google; + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=google" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" + fi + fi + fi + + if ! test $ROCKSDB_DISABLE_ZLIB; then + # Test whether zlib library is installed + $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <zlib.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DZLIB" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lz" + fi + fi + + if ! test $ROCKSDB_DISABLE_BZIP; then + # Test whether bzip library is installed + $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <bzlib.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DBZIP2" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbz2" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lbz2" + fi + fi + + if ! test $ROCKSDB_DISABLE_LZ4; then + # Test whether lz4 library is installed + $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <lz4.h> + #include <lz4hc.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DLZ4" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -llz4" + JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4" + fi + fi + + if ! test $ROCKSDB_DISABLE_ZSTD; then + # Test whether zstd library is installed + $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <zstd.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DZSTD" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd" + fi + fi + + if ! test $ROCKSDB_DISABLE_NUMA; then + # Test whether numa is available + $CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null <<EOF + #include <numa.h> + #include <numaif.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DNUMA" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lnuma" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lnuma" + fi + fi + + if ! test $ROCKSDB_DISABLE_TBB; then + # Test whether tbb is available + $CXX $CFLAGS $LDFLAGS -x c++ - -o /dev/null -ltbb 2>/dev/null <<EOF + #include <tbb/tbb.h> + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DTBB" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltbb" + JAVA_LDFLAGS="$JAVA_LDFLAGS -ltbb" + fi + fi + + if ! test $ROCKSDB_DISABLE_JEMALLOC; then + # Test whether jemalloc is available + if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null -ljemalloc \ + 2>/dev/null; then + # This will enable some preprocessor identifiers in the Makefile + JEMALLOC=1 + # JEMALLOC can be enabled either using the flag (like here) or by + # providing direct link to the jemalloc library + WITH_JEMALLOC_FLAG=1 + # check for JEMALLOC installed with HomeBrew + if [ "$PLATFORM" == "OS_MACOSX" ]; then + if hash brew 2>/dev/null && brew ls --versions jemalloc > /dev/null; then + JEMALLOC_VER=$(brew ls --versions jemalloc | tail -n 1 | cut -f 2 -d ' ') + JEMALLOC_INCLUDE="-I/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/include" + JEMALLOC_LIB="/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/lib/libjemalloc_pic.a" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $JEMALLOC_LIB" + JAVA_STATIC_LDFLAGS="$JAVA_STATIC_LDFLAGS $JEMALLOC_LIB" + fi + fi + fi + fi + if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then + # jemalloc is not available. Let's try tcmalloc + if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null \ + -ltcmalloc 2>/dev/null; then + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc" + JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc" + fi + fi + + if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then + # Test whether malloc_usable_size is available + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <malloc.h> + int main() { + size_t res = malloc_usable_size(0); + return 0; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE" + fi + fi + + if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then + # Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <pthread.h> + int main() { + int x = PTHREAD_MUTEX_ADAPTIVE_NP; + return 0; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX" + fi + fi + + if ! test $ROCKSDB_DISABLE_BACKTRACE; then + # Test whether backtrace is available + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <execinfo.h>> + int main() { + void* frames[1]; + backtrace_symbols(frames, backtrace(frames, 1)); + return 0; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" + else + # Test whether execinfo library is installed + $CXX $CFLAGS -lexecinfo -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <execinfo.h> + int main() { + void* frames[1]; + backtrace_symbols(frames, backtrace(frames, 1)); + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lexecinfo" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lexecinfo" + fi + fi + fi + + if ! test $ROCKSDB_DISABLE_PG; then + # Test if -pg is supported + $CXX $CFLAGS -pg -x c++ - -o /dev/null 2>/dev/null <<EOF + int main() { + return 0; + } +EOF + if [ "$?" = 0 ]; then + PROFILING_FLAGS=-pg + fi + fi + + if ! test $ROCKSDB_DISABLE_SYNC_FILE_RANGE; then + # Test whether sync_file_range is supported for compatibility with an old glibc + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <fcntl.h> + int main() { + int fd = open("/dev/null", 0); + sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE); + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_RANGESYNC_PRESENT" + fi + fi + + if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then + # Test whether sched_getcpu is supported + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <sched.h> + int main() { + int cpuid = sched_getcpu(); + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT" + fi + fi + + if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then + # Test whether c++17 aligned-new is supported + $CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null <<EOF + struct alignas(1024) t {int a;}; + int main() {} +EOF + if [ "$?" = 0 ]; then + PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -faligned-new -DHAVE_ALIGNED_NEW" + fi + fi +fi + +# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning. +# -Wshorten-64-to-32 breaks compilation on FreeBSD i386 +if ! [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then + # Test whether -Wshorten-64-to-32 is available + $CXX $CFLAGS -x c++ - -o /dev/null -Wshorten-64-to-32 2>/dev/null <<EOF + int main() {} +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -Wshorten-64-to-32" + fi +fi + +# shall we use HDFS? + +if test "$USE_HDFS"; then + if test -z "$JAVA_HOME"; then + echo "JAVA_HOME has to be set for HDFS usage." + exit 1 + fi + HDFS_CCFLAGS="$HDFS_CCFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -DUSE_HDFS -I$HADOOP_HOME/include" + HDFS_LDFLAGS="$HDFS_LDFLAGS -lhdfs -L$JAVA_HOME/jre/lib/amd64 -L$HADOOP_HOME/lib/native" + HDFS_LDFLAGS="$HDFS_LDFLAGS -L$JAVA_HOME/jre/lib/amd64/server -L$GLIBC_RUNTIME_PATH/lib" + HDFS_LDFLAGS="$HDFS_LDFLAGS -ldl -lverify -ljava -ljvm" + COMMON_FLAGS="$COMMON_FLAGS $HDFS_CCFLAGS" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $HDFS_LDFLAGS" + JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS" +fi + +if test -z "$PORTABLE"; then + if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then + # Tune for this POWER processor, treating '+' models as base models + POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+` + COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER " + elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then + COMMON_FLAGS="$COMMON_FLAGS -march=z10 " + elif test -n "`echo $TARGET_ARCHITECTURE | grep ^arm`"; then + # TODO: Handle this with approprite options. + COMMON_FLAGS="$COMMON_FLAGS" + elif [ "$TARGET_OS" == "IOS" ]; then + COMMON_FLAGS="$COMMON_FLAGS" + elif [ "$TARGET_OS" != "AIX" ] && [ "$TARGET_OS" != "SunOS" ]; then + COMMON_FLAGS="$COMMON_FLAGS -march=native " + elif test "$USE_SSE"; then + COMMON_FLAGS="$COMMON_FLAGS -msse4.2 -mpclmul" + fi +elif test "$USE_SSE"; then + COMMON_FLAGS="$COMMON_FLAGS -msse4.2 -mpclmul" +fi + +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <cstdint> + #include <nmmintrin.h> + int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" + exit 1 +fi + +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #include <cstdint> + #include <wmmintrin.h> + int main() { + const auto a = _mm_set_epi64x(0, 0); + const auto b = _mm_set_epi64x(0, 0); + const auto c = _mm_clmulepi64_si128(a, b, 0x00); + auto d = _mm_cvtsi128_si64(c); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DHAVE_PCLMUL" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" + exit 1 +fi + +# iOS doesn't support thread-local storage, but this check would erroneously +# succeed because the cross-compiler flags are added by the Makefile, not this +# script. +if [ "$PLATFORM" != IOS ]; then + $CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + #if defined(_MSC_VER) && !defined(__thread) + #define __thread __declspec(thread) + #endif + int main() { + static __thread int tls; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL" + fi +fi + +PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" +PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" + +VALGRIND_VER="$VALGRIND_VER" + +ROCKSDB_MAJOR=`build_tools/version.sh major` +ROCKSDB_MINOR=`build_tools/version.sh minor` +ROCKSDB_PATCH=`build_tools/version.sh patch` + +echo "CC=$CC" >> "$OUTPUT" +echo "CXX=$CXX" >> "$OUTPUT" +echo "PLATFORM=$PLATFORM" >> "$OUTPUT" +echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT" +echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT" +echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT" +echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT" +echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT" +echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT" +echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> "$OUTPUT" +echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> "$OUTPUT" +echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> "$OUTPUT" +echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> "$OUTPUT" +echo "EXEC_LDFLAGS=$EXEC_LDFLAGS" >> "$OUTPUT" +echo "JEMALLOC_INCLUDE=$JEMALLOC_INCLUDE" >> "$OUTPUT" +echo "JEMALLOC_LIB=$JEMALLOC_LIB" >> "$OUTPUT" +echo "ROCKSDB_MAJOR=$ROCKSDB_MAJOR" >> "$OUTPUT" +echo "ROCKSDB_MINOR=$ROCKSDB_MINOR" >> "$OUTPUT" +echo "ROCKSDB_PATCH=$ROCKSDB_PATCH" >> "$OUTPUT" +echo "CLANG_SCAN_BUILD=$CLANG_SCAN_BUILD" >> "$OUTPUT" +echo "CLANG_ANALYZER=$CLANG_ANALYZER" >> "$OUTPUT" +echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT" +echo "FIND=$FIND" >> "$OUTPUT" +echo "WATCH=$WATCH" >> "$OUTPUT" +# This will enable some related identifiers for the preprocessor +if test -n "$JEMALLOC"; then + echo "JEMALLOC=1" >> "$OUTPUT" +fi +# Indicates that jemalloc should be enabled using -ljemalloc flag +# The alternative is to porvide a direct link to the library via JEMALLOC_LIB +# and JEMALLOC_INCLUDE +if test -n "$WITH_JEMALLOC_FLAG"; then + echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT" +fi +echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT" diff --git a/src/rocksdb/build_tools/cont_integration.sh b/src/rocksdb/build_tools/cont_integration.sh new file mode 100755 index 00000000..66d25522 --- /dev/null +++ b/src/rocksdb/build_tools/cont_integration.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2016, Facebook. All rights reserved. +# +# Overall wrapper script for RocksDB continuous builds. The implementation is a +# trivial pulling scheme. We loop infinitely, check if any new changes have been +# committed, if yes then trigger a Sandcastle run, and finally go to sleep again +# for a certain interval. +# + +SRC_GIT_REPO=/data/git/rocksdb-public +error=0 + +function log { + DATE=`date +%Y-%m-%d:%H:%M:%S` + # shellcheck disable=SC2068 + echo $DATE $@ +} + +function log_err { + # shellcheck disable=SC2145 + log "ERROR: $@ Error code: $error." +} + +function update_repo_status { + # Update the parent first. + pushd $SRC_GIT_REPO + + # This is a fatal error. Something in the environment isn't right and we will + # terminate the execution. + error=$? + if [ ! $error -eq 0 ]; then + log_err "Where is $SRC_GIT_REPO?" + exit $error + fi + + HTTPS_PROXY=fwdproxy:8080 git fetch -f + + error=$? + if [ ! $error -eq 0 ]; then + log_err "git fetch -f failed." + popd + return $error + fi + + git update-ref refs/heads/master refs/remotes/origin/master + + error=$? + if [ ! $error -eq 0 ]; then + log_err "git update-ref failed." + popd + return $error + fi + + popd + + # We're back in an instance-specific directory. Get the latest changes. + git pull --rebase + + error=$? + if [ ! $error -eq 0 ]; then + log_err "git pull --rebase failed." + return $error + fi +} + +# +# Execution starts here. +# + +# Path to the determinator from the root of the RocksDB repo. +CONTRUN_DETERMINATOR=./build_tools/RocksDBCommonHelper.php + +# Value of the previous commit. +PREV_COMMIT= + +log "Starting to monitor for new RocksDB changes ..." +log "Running under `pwd` as `whoami`." + +# Paranoia. Make sure that we're using the right branch. +git checkout master + +error=$? +if [ ! $error -eq 0 ]; then + log_err "This is not good. Can't checkout master. Bye-bye!" + exit 1 +fi + +# We'll run forever and let the execution environment terminate us if we'll +# exceed whatever timeout is set for the job. +while true; +do + # Get the latest changes committed. + update_repo_status + + error=$? + if [ $error -eq 0 ]; then + LAST_COMMIT=`git log -1 | head -1 | grep commit | awk '{ print $2; }'` + + log "Last commit is '$LAST_COMMIT', previous commit is '$PREV_COMMIT'." + + if [ "$PREV_COMMIT" == "$LAST_COMMIT" ]; then + log "There were no changes since the last time I checked. Going to sleep." + else + if [ ! -z "$LAST_COMMIT" ]; then + log "New code has been committed or previous commit not known. " \ + "Will trigger the tests." + + PREV_COMMIT=$LAST_COMMIT + log "Updated previous commit to '$PREV_COMMIT'." + + # + # This is where we'll trigger the Sandcastle run. The values for + # HTTPS_APP_VALUE and HTTPS_APP_VALUE will be set in the container we're + # running in. + # + POST_RECEIVE_HOOK=1 php $CONTRUN_DETERMINATOR + + error=$? + if [ $error -eq 0 ]; then + log "Sandcastle run successfully triggered." + else + log_err "Failed to trigger Sandcastle run." + fi + else + log_err "Previous commit not updated. Don't know what the last one is." + fi + fi + else + log_err "Getting latest changes failed. Will skip running tests for now." + fi + + # Always sleep, even if errors happens while trying to determine the latest + # commit. This will prevent us terminating in case of transient errors. + log "Will go to sleep for 5 minutes." + sleep 5m +done diff --git a/src/rocksdb/build_tools/dependencies.sh b/src/rocksdb/build_tools/dependencies.sh new file mode 100644 index 00000000..868e0bbd --- /dev/null +++ b/src/rocksdb/build_tools/dependencies.sh @@ -0,0 +1,18 @@ +GCC_BASE=/mnt/gvfs/third-party2/gcc/112ec378fec7002ad3e09afde022e656049f7191/5.x/centos7-native/c447969 +CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/04999bdb3ce81a11073535dcb00b5e13dc1cbaf5/stable/centos7-native/c9f9104 +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/92b0c8e5c8eecc71eb042594ce1ab3413799b385/5.x/gcc-5-glibc-2.23/339d858 +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/3d8698d5973ba94f41620a80a67e4457fdf01e90/2.23/gcc-5-glibc-2.23/ca1d1c0 +SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/gcc-5-glibc-2.23/9bc6787 +ZLIB_BASE=/mnt/gvfs/third-party2/zlib/22c2d65676fb7c23cfa797c4f6937f38b026f3cf/1.2.8/gcc-5-glibc-2.23/9bc6787 +BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/gcc-5-glibc-2.23/9bc6787 +LZ4_BASE=/mnt/gvfs/third-party2/lz4/907b498203d297947f3bb70b9466f47e100f1873/r131/gcc-5-glibc-2.23/9bc6787 +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/af6628a46758f1a15484a1760cd7294164bc5ba1/1.3.5/gcc-5-glibc-2.23/03859b5 +GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/gcc-5-glibc-2.23/9bc6787 +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b1a0e56c1e3e6929813a4331ade3a58ff083afbb/master/gcc-5-glibc-2.23/aa64d6b +NUMA_BASE=/mnt/gvfs/third-party2/numa/9cbf2460284c669ed19c3ccb200a71f7dd7e53c7/2.0.11/gcc-5-glibc-2.23/9bc6787 +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/bf3d7497fe4e6d007354f0adffa16ce3003f8338/1.3/gcc-5-glibc-2.23/b443de1 +TBB_BASE=/mnt/gvfs/third-party2/tbb/ff4e0b093534704d8abab678a4fd7f5ea7b094c7/2018_U5/gcc-5-glibc-2.23/9bc6787 +KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/b5c4a61a5c483ba24722005ae07895971a2ac707/4.0.9-36_fbk5_2933_gd092e3f/gcc-5-glibc-2.23/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/55031de95a2b46c82948743419a603b3d6aefe28/2.29.1/centos7-native/da39a3e +VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/f3f697a28122e6bcd513273dd9c1ff23852fc59f/3.13.0/gcc-5-glibc-2.23/9bc6787 +LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.2.3/gcc-5-glibc-2.23/65372bd diff --git a/src/rocksdb/build_tools/dependencies_4.8.1.sh b/src/rocksdb/build_tools/dependencies_4.8.1.sh new file mode 100644 index 00000000..bd02165d --- /dev/null +++ b/src/rocksdb/build_tools/dependencies_4.8.1.sh @@ -0,0 +1,19 @@ +# shellcheck disable=SC2148 +GCC_BASE=/mnt/gvfs/third-party2/gcc/cf7d14c625ce30bae1a4661c2319c5a283e4dd22/4.8.1/centos6-native/cc6c9dc +CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/8598c375b0e94e1448182eb3df034704144a838d/stable/centos6-native/3f16ddd +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/d6e0a7da6faba45f5e5b1638f9edd7afc2f34e7d/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/d282e6e8f3d20f4e40a516834847bdc038e07973/2.17/gcc-4.8.1-glibc-2.17/99df8fc +SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfcb4/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a +ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a +BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a +LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-4.8.1-glibc-2.17/c3f970a +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-4.8.1-glibc-2.17/c3f970a +GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-4.8.1-glibc-2.17/8d31e51 +NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/ad576de2a1ea560c4d3434304f0fc4e079bede42/trunk/gcc-4.8.1-glibc-2.17/675d945 +TBB_BASE=/mnt/gvfs/third-party2/tbb/9d9a554877d0c5bef330fe818ab7178806dd316a/4.0_update2/gcc-4.8.1-glibc-2.17/c3f970a +KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/7c111ff27e0c466235163f00f280a9d617c3d2ec/4.0.9-36_fbk5_2933_gd092e3f/gcc-4.8.1-glibc-2.17/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/b7fd454c4b10c6a81015d4524ed06cdeab558490/2.26/centos6-native/da39a3e +VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d7f4d4d86674a57668e3a96f76f0e17dd0eb8765/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a +LUA_BASE=/mnt/gvfs/third-party2/lua/61e4abf5813bbc39bc4f548757ccfcadde175a48/5.2.3/centos6-native/730f94e diff --git a/src/rocksdb/build_tools/dependencies_platform007.sh b/src/rocksdb/build_tools/dependencies_platform007.sh new file mode 100644 index 00000000..44e9e58f --- /dev/null +++ b/src/rocksdb/build_tools/dependencies_platform007.sh @@ -0,0 +1,18 @@ +GCC_BASE=/mnt/gvfs/third-party2/gcc/6e8e715624fd15256a7970073387793dfcf79b46/7.x/centos7-native/b2ef2b6 +CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/ef37e1faa1c29782abfac1ae65a291b9b7966f6d/stable/centos7-native/c9f9104 +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c67031f0f739ac61575a061518d6ef5038f99f90/7.x/platform007/5620abc +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/60d6f124a78798b73944f5ba87c2306ae3460153/2.26/platform007/f259413 +SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d +ZLIB_BASE=/mnt/gvfs/third-party2/zlib/22c2d65676fb7c23cfa797c4f6937f38b026f3cf/1.2.8/platform007/ca4da3d +BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d +LZ4_BASE=/mnt/gvfs/third-party2/lz4/907b498203d297947f3bb70b9466f47e100f1873/r131/platform007/ca4da3d +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/3ee276cbacfad3074e3f07bf826ac47f06970f4e/1.3.5/platform007/15a3614 +GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/9c910d36d6235cc40e8ff559358f1833452300ca/master/platform007/5b0f53e +NUMA_BASE=/mnt/gvfs/third-party2/numa/9cbf2460284c669ed19c3ccb200a71f7dd7e53c7/2.0.11/platform007/ca4da3d +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/bf3d7497fe4e6d007354f0adffa16ce3003f8338/1.3/platform007/6f3e0a9 +TBB_BASE=/mnt/gvfs/third-party2/tbb/ff4e0b093534704d8abab678a4fd7f5ea7b094c7/2018_U5/platform007/ca4da3d +KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/b5c4a61a5c483ba24722005ae07895971a2ac707/fb/platform007/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/92ff90349e2f43ea0a8246d8b1cf17b6869013e3/2.29.1/centos7-native/da39a3e +VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/f3f697a28122e6bcd513273dd9c1ff23852fc59f/3.13.0/platform007/ca4da3d +LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832 diff --git a/src/rocksdb/build_tools/dockerbuild.sh b/src/rocksdb/build_tools/dockerbuild.sh new file mode 100755 index 00000000..02f60944 --- /dev/null +++ b/src/rocksdb/build_tools/dockerbuild.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +docker run -v $PWD:/rocks -w /rocks buildpack-deps make diff --git a/src/rocksdb/build_tools/error_filter.py b/src/rocksdb/build_tools/error_filter.py new file mode 100644 index 00000000..5ef1e9c2 --- /dev/null +++ b/src/rocksdb/build_tools/error_filter.py @@ -0,0 +1,174 @@ +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +'''Filter for error messages in test output: + - Receives merged stdout/stderr from test on stdin + - Finds patterns of known error messages for test name (first argument) + - Prints those error messages to stdout +''' + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import re +import sys + + +class ErrorParserBase(object): + def parse_error(self, line): + '''Parses a line of test output. If it contains an error, returns a + formatted message describing the error; otherwise, returns None. + Subclasses must override this method. + ''' + raise NotImplementedError + + +class GTestErrorParser(ErrorParserBase): + '''A parser that remembers the last test that began running so it can print + that test's name upon detecting failure. + ''' + _GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$') + # format: '<filename or "unknown file">:<line #>: Failure' + _GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$') + + def __init__(self): + self._last_gtest_name = 'Unknown test' + + def parse_error(self, line): + gtest_name_match = self._GTEST_NAME_PATTERN.match(line) + if gtest_name_match: + self._last_gtest_name = gtest_name_match.group(1) + return None + gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) + if gtest_fail_match: + return '%s failed: %s' % ( + self._last_gtest_name, gtest_fail_match.group(1)) + return None + + +class MatchErrorParser(ErrorParserBase): + '''A simple parser that returns the whole line if it matches the pattern. + ''' + def __init__(self, pattern): + self._pattern = re.compile(pattern) + + def parse_error(self, line): + if self._pattern.match(line): + return line + return None + + +class CompilerErrorParser(MatchErrorParser): + def __init__(self): + # format (compile error): + # '<filename>:<line #>:<column #>: error: <error msg>' + # format (link error): + # '<filename>:<line #>: error: <error msg>' + # The below regex catches both + super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:') + + +class ScanBuildErrorParser(MatchErrorParser): + def __init__(self): + super(ScanBuildErrorParser, self).__init__( + r'scan-build: \d+ bugs found.$') + + +class DbCrashErrorParser(MatchErrorParser): + def __init__(self): + super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.') + + +class WriteStressErrorParser(MatchErrorParser): + def __init__(self): + super(WriteStressErrorParser, self).__init__( + r'ERROR: write_stress died with exitcode=\d+') + + +class AsanErrorParser(MatchErrorParser): + def __init__(self): + super(AsanErrorParser, self).__init__( + r'==\d+==ERROR: AddressSanitizer:') + + +class UbsanErrorParser(MatchErrorParser): + def __init__(self): + # format: '<filename>:<line #>:<column #>: runtime error: <error msg>' + super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:') + + +class ValgrindErrorParser(MatchErrorParser): + def __init__(self): + # just grab the summary, valgrind doesn't clearly distinguish errors + # from other log messages. + super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:') + + +class CompatErrorParser(MatchErrorParser): + def __init__(self): + super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$') + + +class TsanErrorParser(MatchErrorParser): + def __init__(self): + super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:') + + +_TEST_NAME_TO_PARSERS = { + 'punit': [CompilerErrorParser, GTestErrorParser], + 'unit': [CompilerErrorParser, GTestErrorParser], + 'release': [CompilerErrorParser, GTestErrorParser], + 'unit_481': [CompilerErrorParser, GTestErrorParser], + 'release_481': [CompilerErrorParser, GTestErrorParser], + 'clang_unit': [CompilerErrorParser, GTestErrorParser], + 'clang_release': [CompilerErrorParser, GTestErrorParser], + 'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser], + 'code_cov': [CompilerErrorParser, GTestErrorParser], + 'unity': [CompilerErrorParser, GTestErrorParser], + 'lite': [CompilerErrorParser], + 'lite_test': [CompilerErrorParser, GTestErrorParser], + 'stress_crash': [CompilerErrorParser, DbCrashErrorParser], + 'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser], + 'write_stress': [CompilerErrorParser, WriteStressErrorParser], + 'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser], + 'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], + 'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], + 'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], + 'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], + 'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], + 'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], + 'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser], + 'format_compatible': [CompilerErrorParser, CompatErrorParser], + 'run_format_compatible': [CompilerErrorParser, CompatErrorParser], + 'no_compression': [CompilerErrorParser, GTestErrorParser], + 'run_no_compression': [CompilerErrorParser, GTestErrorParser], + 'regression': [CompilerErrorParser], + 'run_regression': [CompilerErrorParser], +} + + +def main(): + if len(sys.argv) != 2: + return 'Usage: %s <test name>' % sys.argv[0] + test_name = sys.argv[1] + if test_name not in _TEST_NAME_TO_PARSERS: + return 'Unknown test name: %s' % test_name + + error_parsers = [] + for parser_cls in _TEST_NAME_TO_PARSERS[test_name]: + error_parsers.append(parser_cls()) + + for line in sys.stdin: + line = line.strip() + for error_parser in error_parsers: + error_msg = error_parser.parse_error(line) + if error_msg is not None: + print(error_msg) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/rocksdb/build_tools/fb_compile_mongo.sh b/src/rocksdb/build_tools/fb_compile_mongo.sh new file mode 100755 index 00000000..c087f816 --- /dev/null +++ b/src/rocksdb/build_tools/fb_compile_mongo.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +# fail early +set -e + +if test -z $ROCKSDB_PATH; then + ROCKSDB_PATH=~/rocksdb +fi +source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh + +EXTRA_LDFLAGS="" + +if test -z $ALLOC; then + # default + ALLOC=tcmalloc +elif [[ $ALLOC == "jemalloc" ]]; then + ALLOC=system + EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive" +fi + +# we need to force mongo to use static library, not shared +STATIC_LIB_DEP_DIR='build/static_library_dependencies' +test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR +test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR +test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR + +EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR" + +set -x + +EXTRA_CMD="" +if ! test -e version.json; then + # this is Mongo 3.0 + EXTRA_CMD="--rocksdb \ + --variant-dir=linux2/norm + --cxx=${CXX} \ + --cc=${CC} \ + --use-system-zlib" # add this line back to normal code path + # when https://jira.mongodb.org/browse/SERVER-19123 is resolved +fi + +scons \ + LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \ + CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \ + LIBS="lz4 gcc stdc++" \ + LIBPATH="$ROCKSDB_PATH" \ + CPPPATH="$ROCKSDB_PATH/include" \ + -j32 \ + --allocator=$ALLOC \ + --nostrip \ + --opt=on \ + --disable-minimum-compiler-version-enforcement \ + --use-system-snappy \ + --disable-warnings-as-errors \ + $EXTRA_CMD $* diff --git a/src/rocksdb/build_tools/fbcode_config.sh b/src/rocksdb/build_tools/fbcode_config.sh new file mode 100644 index 00000000..f46a580b --- /dev/null +++ b/src/rocksdb/build_tools/fbcode_config.sh @@ -0,0 +1,161 @@ +#!/bin/sh +# +# Set environment variables so that we can compile rocksdb using +# fbcode settings. It uses the latest g++ and clang compilers and also +# uses jemalloc +# Environment variables that change the behavior of this script: +# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included + + +BASEDIR=`dirname $BASH_SOURCE` +source "$BASEDIR/dependencies.sh" + +CFLAGS="" + +# libgcc +LIBGCC_INCLUDE="$LIBGCC_BASE/include" +LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" + +# glibc +GLIBC_INCLUDE="$GLIBC_BASE/include" +GLIBC_LIBS=" -L $GLIBC_BASE/lib" + +# snappy +SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" +if test -z $PIC_BUILD; then + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" +else + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" +fi +CFLAGS+=" -DSNAPPY" + +if test -z $PIC_BUILD; then + # location of zlib headers and libraries + ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" + ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" + CFLAGS+=" -DZLIB" + + # location of bzip headers and libraries + BZIP_INCLUDE=" -I $BZIP2_BASE/include/" + BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" + CFLAGS+=" -DBZIP2" + + LZ4_INCLUDE=" -I $LZ4_BASE/include/" + LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" + CFLAGS+=" -DLZ4" +fi + +ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" +if test -z $PIC_BUILD; then + ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" +else + ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a" +fi +CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY" + +# location of gflags headers and libraries +GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" +if test -z $PIC_BUILD; then + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" +else + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" +fi +CFLAGS+=" -DGFLAGS=gflags" + +# location of jemalloc +JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" +JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" + +if test -z $PIC_BUILD; then + # location of numa + NUMA_INCLUDE=" -I $NUMA_BASE/include/" + NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" + CFLAGS+=" -DNUMA" + + # location of libunwind + LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" +fi + +# location of TBB +TBB_INCLUDE=" -isystem $TBB_BASE/include/" +if test -z $PIC_BUILD; then + TBB_LIBS="$TBB_BASE/lib/libtbb.a" +else + TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a" +fi +CFLAGS+=" -DTBB" + +# use Intel SSE support for checksum calculations +export USE_SSE=1 +export PORTABLE=1 + +BINUTILS="$BINUTILS_BASE/bin" +AR="$BINUTILS/ar" + +DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" + +STDLIBS="-L $GCC_BASE/lib64" + +CLANG_BIN="$CLANG_BASE/bin" +CLANG_LIB="$CLANG_BASE/lib" +CLANG_SRC="$CLANG_BASE/../../src" + +CLANG_ANALYZER="$CLANG_BIN/clang++" +CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build" + +if [ -z "$USE_CLANG" ]; then + # gcc + CC="$GCC_BASE/bin/gcc" + CXX="$GCC_BASE/bin/g++" + + CFLAGS+=" -B$BINUTILS/gold" + CFLAGS+=" -isystem $GLIBC_INCLUDE" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + JEMALLOC=1 +else + # clang + CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" + CC="$CLANG_BIN/clang" + CXX="$CLANG_BIN/clang++" + + KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" + + CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x " + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux " + CFLAGS+=" -isystem $GLIBC_INCLUDE" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + CFLAGS+=" -isystem $CLANG_INCLUDE" + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " + CFLAGS+=" -Wno-expansion-to-defined " + CXXFLAGS="-nostdinc++" +fi + +CFLAGS+=" $DEPS_INCLUDE" +CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42" +CXXFLAGS+=" $CFLAGS" + +EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" +EXEC_LDFLAGS+=" -B$BINUTILS/gold" +EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so" +EXEC_LDFLAGS+=" $LIBUNWIND" +EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib" +# required by libtbb +EXEC_LDFLAGS+=" -ldl" + +PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" + +EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS" + +VALGRIND_VER="$VALGRIND_BASE/bin/" + +LUA_PATH="$LUA_BASE" + +if test -z $PIC_BUILD; then + LUA_LIB=" $LUA_PATH/lib/liblua.a" +else + LUA_LIB=" $LUA_PATH/lib/liblua_pic.a" +fi + +export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB diff --git a/src/rocksdb/build_tools/fbcode_config4.8.1.sh b/src/rocksdb/build_tools/fbcode_config4.8.1.sh new file mode 100644 index 00000000..c40c1013 --- /dev/null +++ b/src/rocksdb/build_tools/fbcode_config4.8.1.sh @@ -0,0 +1,116 @@ +#!/bin/sh +# +# Set environment variables so that we can compile rocksdb using +# fbcode settings. It uses the latest g++ compiler and also +# uses jemalloc + +BASEDIR=`dirname $BASH_SOURCE` +source "$BASEDIR/dependencies_4.8.1.sh" + +# location of libgcc +LIBGCC_INCLUDE="$LIBGCC_BASE/include" +LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" + +# location of glibc +GLIBC_INCLUDE="$GLIBC_BASE/include" +GLIBC_LIBS=" -L $GLIBC_BASE/lib" + +# location of snappy headers and libraries +SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include" +SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" + +# location of zlib headers and libraries +ZLIB_INCLUDE=" -I $ZLIB_BASE/include" +ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" + +# location of bzip headers and libraries +BZIP2_INCLUDE=" -I $BZIP2_BASE/include/" +BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a" + +LZ4_INCLUDE=" -I $LZ4_BASE/include" +LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" + +ZSTD_INCLUDE=" -I $ZSTD_BASE/include" +ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" + +# location of gflags headers and libraries +GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" +GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" + +# location of jemalloc +JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include" +JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a" + +# location of numa +NUMA_INCLUDE=" -I $NUMA_BASE/include/" +NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" + +# location of libunwind +LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" + +# location of tbb +TBB_INCLUDE=" -isystem $TBB_BASE/include/" +TBB_LIBS="$TBB_BASE/lib/libtbb.a" + +# use Intel SSE support for checksum calculations +export USE_SSE=1 +export PORTABLE=1 + +BINUTILS="$BINUTILS_BASE/bin" +AR="$BINUTILS/ar" + +DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" + +STDLIBS="-L $GCC_BASE/lib64" + +if [ -z "$USE_CLANG" ]; then + # gcc + CC="$GCC_BASE/bin/gcc" + CXX="$GCC_BASE/bin/g++" + + CFLAGS="-B$BINUTILS/gold -m64 -mtune=generic" + CFLAGS+=" -isystem $GLIBC_INCLUDE" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + JEMALLOC=1 +else + # clang + CLANG_BIN="$CLANG_BASE/bin" + CLANG_LIB="$CLANG_BASE/lib" + CLANG_INCLUDE="$CLANG_LIB/clang/*/include" + CC="$CLANG_BIN/clang" + CXX="$CLANG_BIN/clang++" + + KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/" + + CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib" + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 " + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1/x86_64-facebook-linux " + CFLAGS+=" -isystem $GLIBC_INCLUDE" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + CFLAGS+=" -isystem $CLANG_INCLUDE" + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " + CXXFLAGS="-nostdinc++" +fi + +CFLAGS+=" $DEPS_INCLUDE" +CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42" +CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA -DTBB" +CXXFLAGS+=" $CFLAGS" + +EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" +EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so" +EXEC_LDFLAGS+=" $LIBUNWIND" +EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib" +# required by libtbb +EXEC_LDFLAGS+=" -ldl" + +PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" + +EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" + +VALGRIND_VER="$VALGRIND_BASE/bin/" + +LUA_PATH="$LUA_BASE" + +export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE LUA_PATH diff --git a/src/rocksdb/build_tools/fbcode_config_platform007.sh b/src/rocksdb/build_tools/fbcode_config_platform007.sh new file mode 100644 index 00000000..67d156e4 --- /dev/null +++ b/src/rocksdb/build_tools/fbcode_config_platform007.sh @@ -0,0 +1,157 @@ +#!/bin/sh +# +# Set environment variables so that we can compile rocksdb using +# fbcode settings. It uses the latest g++ and clang compilers and also +# uses jemalloc +# Environment variables that change the behavior of this script: +# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included + + +BASEDIR=`dirname $BASH_SOURCE` +source "$BASEDIR/dependencies_platform007.sh" + +CFLAGS="" + +# libgcc +LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0" +LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" + +# glibc +GLIBC_INCLUDE="$GLIBC_BASE/include" +GLIBC_LIBS=" -L $GLIBC_BASE/lib" + +# snappy +SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" +if test -z $PIC_BUILD; then + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" +else + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" +fi +CFLAGS+=" -DSNAPPY" + +if test -z $PIC_BUILD; then + # location of zlib headers and libraries + ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" + ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" + CFLAGS+=" -DZLIB" + + # location of bzip headers and libraries + BZIP_INCLUDE=" -I $BZIP2_BASE/include/" + BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" + CFLAGS+=" -DBZIP2" + + LZ4_INCLUDE=" -I $LZ4_BASE/include/" + LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" + CFLAGS+=" -DLZ4" +fi + +ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" +if test -z $PIC_BUILD; then + ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" +else + ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a" +fi +CFLAGS+=" -DZSTD" + +# location of gflags headers and libraries +GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" +if test -z $PIC_BUILD; then + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" +else + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" +fi +CFLAGS+=" -DGFLAGS=gflags" + +# location of jemalloc +JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" +JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" + +if test -z $PIC_BUILD; then + # location of numa + NUMA_INCLUDE=" -I $NUMA_BASE/include/" + NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" + CFLAGS+=" -DNUMA" + + # location of libunwind + LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" +fi + +# location of TBB +TBB_INCLUDE=" -isystem $TBB_BASE/include/" +if test -z $PIC_BUILD; then + TBB_LIBS="$TBB_BASE/lib/libtbb.a" +else + TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a" +fi +CFLAGS+=" -DTBB" + +# use Intel SSE support for checksum calculations +export USE_SSE=1 +export PORTABLE=1 + +BINUTILS="$BINUTILS_BASE/bin" +AR="$BINUTILS/ar" + +DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" + +STDLIBS="-L $GCC_BASE/lib64" + +CLANG_BIN="$CLANG_BASE/bin" +CLANG_LIB="$CLANG_BASE/lib" +CLANG_SRC="$CLANG_BASE/../../src" + +CLANG_ANALYZER="$CLANG_BIN/clang++" +CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build" + +if [ -z "$USE_CLANG" ]; then + # gcc + CC="$GCC_BASE/bin/gcc" + CXX="$GCC_BASE/bin/g++" + + CFLAGS+=" -B$BINUTILS/gold" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + CFLAGS+=" -isystem $GLIBC_INCLUDE" + JEMALLOC=1 +else + # clang + CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" + CC="$CLANG_BIN/clang" + CXX="$CLANG_BIN/clang++" + + KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" + + CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x " + CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux " + CFLAGS+=" -isystem $GLIBC_INCLUDE" + CFLAGS+=" -isystem $LIBGCC_INCLUDE" + CFLAGS+=" -isystem $CLANG_INCLUDE" + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " + CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " + CFLAGS+=" -Wno-expansion-to-defined " + CXXFLAGS="-nostdinc++" +fi + +CFLAGS+=" $DEPS_INCLUDE" +CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42" +CXXFLAGS+=" $CFLAGS" + +EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" +EXEC_LDFLAGS+=" -B$BINUTILS/gold" +EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so" +EXEC_LDFLAGS+=" $LIBUNWIND" +EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib" +# required by libtbb +EXEC_LDFLAGS+=" -ldl" + +PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" + +EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS" + +VALGRIND_VER="$VALGRIND_BASE/bin/" + +# lua not supported because it's on track for deprecation, I think +LUA_PATH= +LUA_LIB= + +export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB diff --git a/src/rocksdb/build_tools/format-diff.sh b/src/rocksdb/build_tools/format-diff.sh new file mode 100755 index 00000000..81221ed9 --- /dev/null +++ b/src/rocksdb/build_tools/format-diff.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# If clang_format_diff.py command is not specfied, we assume we are able to +# access directly without any path. +if [ -z $CLANG_FORMAT_DIFF ] +then +CLANG_FORMAT_DIFF="clang-format-diff.py" +fi + +# Check clang-format-diff.py +if ! which $CLANG_FORMAT_DIFF &> /dev/null +then + echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!" + echo "You can download clang-format-diff.py by running: " + echo " curl --location http://goo.gl/iUW1u2 -o ${CLANG_FORMAT_DIFF}" + echo "You can download clang-format by running: " + echo " brew install clang-format" + echo "Then, move both files (i.e. ${CLANG_FORMAT_DIFF} and clang-format) to some directory within PATH=${PATH}" + exit 128 +fi + +# Check argparse, a library that clang-format-diff.py requires. +python 2>/dev/null << EOF +import argparse +EOF + +if [ "$?" != 0 ] +then + echo "To run clang-format-diff.py, we'll need the library "argparse" to be" + echo "installed. You can try either of the follow ways to install it:" + echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse" + echo " 2. easy_install argparse (if you have easy_install)" + echo " 3. pip install argparse (if you have pip)" + exit 129 +fi + +# TODO(kailiu) following work is not complete since we still need to figure +# out how to add the modified files done pre-commit hook to git's commit index. +# +# Check if this script has already been added to pre-commit hook. +# Will suggest user to add this script to pre-commit hook if their pre-commit +# is empty. +# PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit" +# if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null +# then +# echo "Would you like to add this script to pre-commit hook, which will do " +# echo -n "the format check for all the affected lines before you check in (y/n):" +# read add_to_hook +# if [ "$add_to_hook" == "y" ] +# then +# ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH +# fi +# fi +set -e + +uncommitted_code=`git diff HEAD` +LAST_MASTER=`git merge-base master HEAD` + +# If there's no uncommitted changes, we assume user are doing post-commit +# format check, in which case we'll check the modified lines since last commit +# from master. Otherwise, we'll check format of the uncommitted code only. +if [ -z "$uncommitted_code" ] +then + # Check the format of last commit + diffs=$(git diff -U0 $LAST_MASTER^ | $CLANG_FORMAT_DIFF -p 1) +else + # Check the format of uncommitted lines, + diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1) +fi + +if [ -z "$diffs" ] +then + echo "Nothing needs to be reformatted!" + exit 0 +fi + +# Highlight the insertion/deletion from the clang-format-diff.py's output +COLOR_END="\033[0m" +COLOR_RED="\033[0;31m" +COLOR_GREEN="\033[0;32m" + +echo -e "Detect lines that doesn't follow the format rules:\r" +# Add the color to the diff. lines added will be green; lines removed will be red. +echo "$diffs" | + sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" | + sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/" + +if [[ "$OPT" == *"-DTRAVIS"* ]] +then + exit 1 +fi + +echo -e "Would you like to fix the format automatically (y/n): \c" + +# Make sure under any mode, we can read user input. +exec < /dev/tty +read to_fix + +if [ "$to_fix" != "y" ] +then + exit 1 +fi + +# Do in-place format adjustment. +if [ -z "$uncommitted_code" ] +then + git diff -U0 $LAST_MASTER^ | $CLANG_FORMAT_DIFF -i -p 1 +else + git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -i -p 1 +fi +echo "Files reformatted!" + +# Amend to last commit if user do the post-commit format check +if [ -z "$uncommitted_code" ]; then + echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c" + read to_amend + + if [ "$to_amend" == "y" ] + then + git commit -a --amend --reuse-message HEAD + echo "Amended to last commit" + fi +fi diff --git a/src/rocksdb/build_tools/gnu_parallel b/src/rocksdb/build_tools/gnu_parallel new file mode 100755 index 00000000..1cf164ff --- /dev/null +++ b/src/rocksdb/build_tools/gnu_parallel @@ -0,0 +1,7936 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and +# Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA + +# open3 used in Job::start +use IPC::Open3; +# &WNOHANG used in reaper +use POSIX qw(:sys_wait_h setsid ceil :errno_h); +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp\n"); + $ENV{HOME} = "/tmp"; +} + +save_stdin_stdout_stderr(); +save_original_signal_handler(); +parse_options(); +::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); +my $number_of_args; +if($Global::max_number_of_args) { + $number_of_args=$Global::max_number_of_args; +} elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; +} else { + $number_of_args = 1; +} + +my @command; +@command = @ARGV; + +my @fhlist; +if($opt::pipepart) { + @fhlist = map { open_or_exit($_) } "/dev/null"; +} else { + @fhlist = map { open_or_exit($_) } @opt::a; + if(not @fhlist and not $opt::pipe) { + @fhlist = (*STDIN); + } +} + +if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $fhlist[0]; + <$fh>; +} +if($opt::header and not $opt::pipe) { + my $fh = $fhlist[0]; + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = $opt::colsep; + $delimiter ||= "\$"; + my $id = 1; + for my $fh (@fhlist) { + my $line = <$fh>; + chomp($line); + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + # TODO accept configurable short hands + # TODO how to deal with headers in {=...=} + for(@command) { + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } +} else { + my $id = 1; + for my $fh (@fhlist) { + $Global::input_source_header{$id} = $id; + $id++; + } +} + +if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); +} + +if($opt::nonall or $opt::onall) { + onall(@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +# TODO --transfer foo/./bar --cleanup +# multiple --transfer and --basefile with different /./ + +$Global::JobQueue = JobQueue->new( + \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); + +if($opt::eta or $opt::bar) { + # Count the number of jobs before starting any + $Global::JobQueue->total_jobs(); +} +if($opt::pipepart) { + @Global::cat_partials = map { pipe_part_files($_) } @opt::a; + # Unget the command as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}->unget( + map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials + ); +} +for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); +} + +init_run_jobs(); +my $sem; +if($Global::semaphore) { + $sem = acquire_semaphore(); +} +$SIG{TERM} = \&start_no_new_jobs; + +start_more_jobs(); +if(not $opt::pipepart) { + if($opt::pipe) { + spreadstdin(); + } +} +::debug("init", "Start draining\n"); +drain_job_queue(); +::debug("init", "Done draining\n"); +reaper(); +::debug("init", "Done reaping\n"); +if($opt::pipe and @opt::a) { + for my $job (@Global::tee_jobs) { + unlink $job->fh(2,"name"); + $job->set_fh(2,"name",""); + $job->print(); + unlink $job->fh(1,"name"); + } +} +::debug("init", "Cleaning\n"); +cleanup(); +if($Global::semaphore) { + $sem->release(); +} +for(keys %Global::sshmaster) { + kill "TERM", $_; +} +::debug("init", "Halt\n"); +if($opt::halt_on_error) { + wait_and_exit($Global::halt_on_error_exitstatus); +} else { + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +sub __PIPE_MODE__ {} + +sub pipe_part_files { + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + my $header = find_header(\$buf,open_or_exit($file)); + # find positions + my @pos = find_split_positions($file,$opt::blocksize,length $header); + # Make @cat_partials + my @cat_partials = (); + for(my $i=0; $i<$#pos; $i++) { + push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); + } + # Remote exec should look like: + # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) + # ssh -tt not allowed. Remote will die due to broken pipe anyway. + # TODO test remote with --fifo / --cat + return @cat_partials; +} + +sub find_header { + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $opt::blocksize + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { + if($$buf_ref=~s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions { + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $headerlen = length of header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $headerlen) = @_; + my $size = -s $file; + $block = int $block; + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$headerlen); + for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { + my $buf; + seek($fh, $pos, 0) || die; + while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ /(.*$recend)$recstart/os) { + my $i = length($1); + push(@pos,$pos+$i); + # Start looking for next record _after_ this match + $pos += $i; + last; + } + } else { + # If match $recend$recstart => Record position + my $i = index($buf,$recendrecstart); + if($i != -1) { + push(@pos,$pos+$i); + # Start looking for next record _after_ this match + $pos += $i; + last; + } + } + } + } + push(@pos,$size); + close $fh; + return @pos; +} + +sub cat_partial { + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert start_end to start_len + my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; + return "<". shell_quote_scalar($file) . + q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . + " @start_len"; +} + +sub spreadstdin { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $opt::blocksize + # STDIN + # $opr::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $blocksize = $opt::blocksize; + my $in = *STDIN; + my $header = find_header(\$buf,$in); + while(1) { + my $anything_written = 0; + if(not read($in,substr($buf,length $buf,0),$blocksize)) { + # End-of-file + $chunk_number != 1 and last; + # Force the while-loop once if everything was read by header reading + $one_time_through++ and last; + } + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + next; + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Read n-line records + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex($buf,"\n"); + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + substr($buf,0,$last_newline_pos+1) = ""; + } elsif($opt::regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } else { + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } + } else { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + substr($buf,0,$i) = ""; + } + } else { + # Find the last recend-recstart in $buf + my $i = rindex($buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + substr($buf,0,$i) = ""; + } + } + } + if(not $anything_written and not eof($in)) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially + my $old_blocksize = $blocksize; + $blocksize = ceil($blocksize * 1.3 + 1); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize\n"); + } + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + substr($buf,0,0) = ""; + write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); + + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + my %incomplete_jobs = %Global::running; + my $sleep = 1; + while(keys %incomplete_jobs) { + my $something_written = 0; + for my $pid (keys %incomplete_jobs) { + my $job = $incomplete_jobs{$pid}; + if($job->stdin_buffer_length()) { + $something_written += $job->non_block_write(); + } else { + delete $incomplete_jobs{$pid} + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + } + + if($opt::regexp) { + # If $recstart/$recend contains '|' this should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index($$buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + + sub round_robin_write { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; + my $something_written = 0; + my $block_passed = 0; + my $sleep = 1; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + # Make a queue to spread the blocks evenly + if(not @robin_queue) { + push @robin_queue, values %Global::running; + } + while(my $job = shift @robin_queue) { + if($job->stdin_buffer_length() > 0) { + $something_written += $job->non_block_write(); + } else { + $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $something_written += $job->non_block_write(); + last; + } + } + $sleep = ::reap_usleep($sleep); + } + return $something_written; + } +} + +sub write_record_to_pipe { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $record_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $record_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + # Job is no longer virgin + $job->set_virgin(0); + if(fork()) { + # Skip + } else { + # Chop of at $endpos as we do not know how many rec_sep will + # be removed. + substr($$record_ref,$endpos,length $$record_ref) = ""; + # Remove rec_sep + if($opt::remove_rec_sep) { + Job::remove_rec_sep($record_ref,$recstart,$recend); + } + $job->write($header_ref); + $job->write($record_ref); + close $job->fh(0,"w"); + exit(0); + } + close $job->fh(0,"w"); + return 1; +} + +sub __SEM_MODE__ {} + +sub acquire_semaphore { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + # If run in the background, the PID will change + # therefore release and re-acquire the semaphore + $sem->release(); + if(fork()) { + exit(0); + } else { + # child + # Get a semaphore for this pid + ::die_bug("Can't start a new session: $!") if setsid() == -1; + $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + } + } + return $sem; +} + +sub __PARSE_OPTIONS__ {} + +sub options_hash { + # Returns: + # %hash = the GetOptions config + return + ("debug|D=s" => \$opt::D, + "xargs" => \$opt::xargs, + "m" => \$opt::m, + "X" => \$opt::X, + "v" => \@opt::v, + "joblog=s" => \$opt::joblog, + "results|result|res=s" => \$opt::results, + "resume" => \$opt::resume, + "resume-failed|resumefailed" => \$opt::resume_failed, + "silent" => \$opt::silent, + #"silent-error|silenterror" => \$opt::silent_error, + "keep-order|keeporder|k" => \$opt::keeporder, + "group" => \$opt::group, + "g" => \$opt::retired, + "ungroup|u" => \$opt::ungroup, + "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, + "tmux" => \$opt::tmux, + "null|0" => \$opt::0, + "quote|q" => \$opt::q, + # Replacement strings + "parens=s" => \$opt::parens, + "rpl=s" => \@opt::rpl, + "plus" => \$opt::plus, + "I=s" => \$opt::I, + "extensionreplace|er=s" => \$opt::U, + "U=s" => \$opt::retired, + "basenamereplace|bnr=s" => \$opt::basenamereplace, + "dirnamereplace|dnr=s" => \$opt::dirnamereplace, + "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, + "seqreplace=s" => \$opt::seqreplace, + "slotreplace=s" => \$opt::slotreplace, + "jobs|j=s" => \$opt::jobs, + "delay=f" => \$opt::delay, + "sshdelay=f" => \$opt::sshdelay, + "load=s" => \$opt::load, + "noswap" => \$opt::noswap, + "max-line-length-allowed" => \$opt::max_line_length_allowed, + "number-of-cpus" => \$opt::number_of_cpus, + "number-of-cores" => \$opt::number_of_cores, + "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, + "shellquote|shell_quote|shell-quote" => \$opt::shellquote, + "nice=i" => \$opt::nice, + "timeout=s" => \$opt::timeout, + "tag" => \$opt::tag, + "tagstring|tag-string=s" => \$opt::tagstring, + "onall" => \$opt::onall, + "nonall" => \$opt::nonall, + "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, + "sshlogin|S=s" => \@opt::sshlogin, + "sshloginfile|slf=s" => \@opt::sshloginfile, + "controlmaster|M" => \$opt::controlmaster, + "return=s" => \@opt::return, + "trc=s" => \@opt::trc, + "transfer" => \$opt::transfer, + "cleanup" => \$opt::cleanup, + "basefile|bf=s" => \@opt::basefile, + "B=s" => \$opt::retired, + "ctrlc|ctrl-c" => \$opt::ctrlc, + "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, + "workdir|work-dir|wd=s" => \$opt::workdir, + "W=s" => \$opt::retired, + "tmpdir=s" => \$opt::tmpdir, + "tempdir=s" => \$opt::tmpdir, + "use-compress-program|compress-program=s" => \$opt::compress_program, + "use-decompress-program|decompress-program=s" => \$opt::decompress_program, + "compress" => \$opt::compress, + "tty" => \$opt::tty, + "T" => \$opt::retired, + "halt-on-error|halt=s" => \$opt::halt_on_error, + "H=i" => \$opt::retired, + "retries=i" => \$opt::retries, + "dry-run|dryrun" => \$opt::dryrun, + "progress" => \$opt::progress, + "eta" => \$opt::eta, + "bar" => \$opt::bar, + "arg-sep|argsep=s" => \$opt::arg_sep, + "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, + "trim=s" => \$opt::trim, + "env=s" => \@opt::env, + "recordenv|record-env" => \$opt::record_env, + "plain" => \$opt::plain, + "profile|J=s" => \@opt::profile, + "pipe|spreadstdin" => \$opt::pipe, + "robin|round-robin|roundrobin" => \$opt::roundrobin, + "recstart=s" => \$opt::recstart, + "recend=s" => \$opt::recend, + "regexp|regex" => \$opt::regexp, + "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, + "files|output-as-files|outputasfiles" => \$opt::files, + "block|block-size|blocksize=s" => \$opt::blocksize, + "tollef" => \$opt::retired, + "gnu" => \$opt::gnu, + "xapply" => \$opt::xapply, + "bibtex" => \$opt::bibtex, + "nn|nonotice|no-notice" => \$opt::no_notice, + # xargs-compatibility - implemented, man, testsuite + "max-procs|P=s" => \$opt::jobs, + "delimiter|d=s" => \$opt::d, + "max-chars|s=i" => \$opt::max_chars, + "arg-file|a=s" => \@opt::a, + "no-run-if-empty|r" => \$opt::r, + "replace|i:s" => \$opt::i, + "E=s" => \$opt::eof, + "eof|e:s" => \$opt::eof, + "max-args|n=i" => \$opt::max_args, + "max-replace-args|N=i" => \$opt::max_replace_args, + "colsep|col-sep|C=s" => \$opt::colsep, + "help|h" => \$opt::help, + "L=f" => \$opt::L, + "max-lines|l:f" => \$opt::max_lines, + "interactive|p" => \$opt::p, + "verbose|t" => \$opt::verbose, + "version|V" => \$opt::version, + "minversion|min-version=i" => \$opt::minversion, + "show-limits|showlimits" => \$opt::show_limits, + "exit|x" => \$opt::x, + # Semaphore + "semaphore" => \$opt::semaphore, + "semaphoretimeout=i" => \$opt::semaphoretimeout, + "semaphorename|id=s" => \$opt::semaphorename, + "fg" => \$opt::fg, + "bg" => \$opt::bg, + "wait" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + "shebang|hashbang" => \$opt::shebang, + "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, + "Y" => \$opt::retired, + "skip-first-line" => \$opt::skip_first_line, + "header=s" => \$opt::header, + "cat" => \$opt::cat, + "fifo" => \$opt::fifo, + "pipepart|pipe-part" => \$opt::pipepart, + "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, + ); +} + +sub get_options_from_array { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_options { + # Returns: N/A + # Defaults: + $Global::version = 20141122; + $Global::progname = 'parallel'; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + $Global::quoting = 0; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', + '{/.}' => 's:.*/::; s:\.[^/.]+$::;', + '{.}' => 's:\.[^/.]+$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$::', + '{+.}' => 's:.*\.::', + '{+..}' => 's:.*\.([^.]*\.):$1:', + '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', + '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', + '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $Global::parens = "{==}"; + $/="\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::halt_on_error_exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + + @ARGV=read_options(); + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::0) { $/ = "\0"; } + if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; } + if(defined $opt::p) { $Global::interactive = $opt::p; } + if(defined $opt::q) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + # Deal with --rpl + sub rpl { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + if(defined $opt::parens) { $Global::parens = $opt::parens; } + my $parenslen = 0.5*length $Global::parens; + $Global::parensleft = substr($Global::parens,0,$parenslen); + $Global::parensright = substr($Global::parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/ /,$_,2; + $Global::rpl{$shorthand} = $long; + } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + if(defined $opt::help) { die_usage(); } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } + if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(defined $opt::show_limits) { show_limits(); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } + $opt::blocksize = multiply_binary_prefix($opt::blocksize); + if(defined $opt::controlmaster) { $opt::noctrlc = 1; } + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg) { $Global::semaphore = 1; } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait) { $Global::semaphore = 1; } + if(defined $opt::halt_on_error and + $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } + if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { + ::error("--timeout must be seconds or percentage\n"); + wait_and_exit(255); + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + } + if(defined $opt::nonall) { + # Append a dummy empty argument + push @ARGV, $Global::arg_sep, ""; + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 0; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + $opt::transfer = 1; + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::0 = 1; + $/ = "\0"; + } elsif ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(defined $opt::tag and not defined $opt::tagstring) { + $opt::tagstring = "\257<\257>"; # Default = {} + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ", + "--max-lines, and -L.\n"); + wait_and_exit(255); + } + if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { + # Deal with ::: and :::: + @ARGV=read_args_from_command_line(); + } + + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if($Global::semaphore) { + # A semaphore does not take input from neither stdin nor file + @opt::a = ("/dev/null"); + push(@Global::unget_argv, [Arg->new("")]); + $Semaphore::timeout = $opt::semaphoretimeout || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive.\n"); + ::wait_and_exit(255); + } + } + if(defined $opt::eta) { + $opt::progress = $opt::eta; + } + if(defined $opt::bar) { + $opt::progress = $opt::bar; + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.\n"); + ::error("-B has been retired. Use --bf.\n"); + ::error("-T has been retired. Use --tty.\n"); + ::error("-U has been retired. Use --er.\n"); + ::error("-W has been retired. Use --wd.\n"); + ::error("-Y has been retired. Use --shebang.\n"); + ::error("-H has been retired. Use --halt.\n"); + ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); + ::wait_and_exit(255); + } + citation_notice(); + + parse_sshlogin(); + parse_env_var(); + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If opt_N is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail.\n"); + } + + if(not defined $opt::jobs) { + $opt::jobs = "100%"; + } + open_joblog(); +} + +sub env_quote { + # Input: + # $v = value to quote + # Returns: + # $v = value quoted as environment variable + my $v = $_[0]; + $v =~ s/([\\])/\\$1/g; + $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g; + $v =~ s/\n/"\n"/g; + return $v; +} + +sub record_env { + # Record current %ENV-keys in ~/.parallel/ignored_vars + # Returns: N/A + my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename\n"); + ::wait_and_exit(255); + } +} + +sub parse_env_var { + # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen + # + # Bash functions must be parsed to export them remotely + # Pre-shellshock style bash function: + # myfunc=() {... + # Post-shellshock style bash function: + # BASH_FUNC_myfunc()=() {... + # + # Uses: + # $Global::envvar = eval string that will set variables in both bash and csh + # $Global::envwarn = If functions are used: Give warning in csh + # $Global::envvarlen = length of $Global::envvar + # @opt::env + # $Global::shell + # %ENV + # Returns: N/A + $Global::envvar = ""; + $Global::envwarn = ""; + my @vars = ('parallel_bash_environment'); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + if(grep { /^_$/ } @vars) { + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions. + # So --env myfunc should also look for BASH_FUNC_myfunc() + @vars = map { $_, "BASH_FUNC_$_()" } @vars; + # Keep only defined variables + @vars = grep { defined($ENV{$_}) } @vars; + # Pre-shellshock style bash function: + # myfunc=() { echo myfunc + # } + # Post-shellshock style bash function: + # BASH_FUNC_myfunc()=() { echo myfunc + # } + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; + if(@bash_functions) { + # Functions are not supported for all shells + if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell\n"); + } + } + + # Pre-shellschock names are without () + my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions; + # Post-shellschock names are with () + my @bash_post_shellshock = grep { /\(\)/ } @bash_functions; + + my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } + grep { not /^parallel_bash_environment$/ } @non_functions); + my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } + @non_functions, @bash_pre_shellshock); + + push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; + push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; + + #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"' + #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a' + #'\}\ \;myfunc\ 1; + + # Check if any variables contain \n + if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) { + # \n is bad for csh and will cause it to fail. + $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn; + } + + if(not @qcsh) { push @qcsh, "true"; } + if(not @qbash) { push @qbash, "true"; } + # Create lines like: + # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2" + if(@vars) { + $Global::envvar .= + join"", + (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && } + . join(" && ", @qcsh) + . q{ || } + . join(" && ", @qbash) + .q{;}); + if($ENV{'parallel_bash_environment'}) { + $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; + } + } + $Global::envvarlen = length $Global::envvar; +} + +sub open_joblog { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fd + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results.\n"); + ::wait_and_exit(255); + } + if($opt::joblog) { + if($opt::resume || $opt::resume_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + $append = <$joblog_fh>; # If there is a header: Open as append later + my $joblog_regexp; + if($opt::resume_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + close $joblog_fh; + } + } + if($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fd{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub find_compression_program { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed + my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + + +sub read_options { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = shell_quote_scalar(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does. + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = shell_quote_scalar(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; + } + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from .parallel/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + $ENV{'HOME'}."/.parallel/config", + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if(-r $profile) { + push @profiles, $profile; + } else { + push @profiles, $ENV{'HOME'}."/.parallel/".$profile; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shellwords($_); + } + close $in_fh; + } else { + if(grep /^$profile$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable.\n"); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + @ARGV_env = shellwords($ENV{'PARALLEL'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub read_args_from_command_line { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::internal_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + my $group = $arg; # This group of arguments is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: or :::: + # Append it to the group + push @group, $arg; + } + } + + if($group eq $Global::arg_file_sep + or ($opt::internal_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } elsif($group eq $Global::arg_sep) { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + # Append filehandle to -a + push @opt::a, $outfh; + } else { + ::die_bug("Unknown command line group: $group"); + } + if(defined($arg)) { + # $arg is ::: or :::: + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup { + # Returns: N/A + if(@opt::basefile) { cleanup_basefile(); } +} + +sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} + +sub shell_quote { + # Input: + # @strings = strings to be quoted + # Output: + # @shell_quoted_strings = string quoted with \ as needed by the shell + my @strings = (@_); + for my $a (@strings) { + $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_empty { + # Inputs: + # @strings = strings to be quoted + # Returns: + # @quoted_strings = empty strings quoted as ''. + my @strings = shell_quote(@_); + for my $a (@strings) { + if($a eq "") { + $a = "''"; + } + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_scalar { + # Quote the string so shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted with \ as needed by the shell + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; + $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' + } + return $a; +} + +sub shell_quote_file { + # Quote the string so shell will not expand any special chars and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed + my $a = shell_quote_scalar(shift); + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return $a; +} + +sub shellwords { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + + +sub __FILEHANDLES__ {} + + +sub save_stdin_stdout_stderr { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fd + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # Find file descriptors that are already opened (by the shell) + for my $fdno (1..61) { + # /dev/fd/62 and above are used by bash for <(cmd) + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fd{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fd + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fd)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::stdin_in_opt_a + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + $Global::stdin_in_opt_a = 1; + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory.\n"); + wait_and_exit(255); + } + return $fh; +} + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started + +sub init_run_jobs { + $Global::total_running = 0; + $Global::total_started = 0; + $Global::tty_taken = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + +sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # $Global::max_procs_file + # $Global::max_procs_file_last_mod + # %Global::host + # @opt::sshloginfile + # $Global::start_no_new_jobs + # $opt::filter_hosts + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + my $jobs_started_this_round = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + if($mtime > $Global::max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $Global::max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run","--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + do { + $jobs_started_this_round = 0; + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since + next; + } + if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { + # It has been too short since last start + next; + } + debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + $jobs_started_this_round++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + } while($jobs_started_this_round); + + return $jobs_started; +} +} + +{ + my $no_more_file_handles_warned; + +sub start_another_job { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume and $job->is_already_in_results())); + debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + push(@Global::virgin_jobs,$job); + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + ::error("No more processes: cannot run a single job. Something is wrong.\n"); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ", + "Decreasing number of running jobs to $max. ", + "Raising ulimit -u or /etc/security/limits.conf may help.\n"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Raising ulimit -n or /etc/security/limits.conf may help.\n"); + return 0; + } +} +} + +sub init_progress { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue { + # Uses: + # $opt::progress + # $Global::original_stderr + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + if($opt::progress) { + print $Global::original_stderr init_progress(); + } + my $last_header=""; + my $sleep = 0.2; + do { + while($Global::total_running > 0) { + debug($Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + print $Global::original_stderr "\n", $progress{'header'}, "\n"; + $last_header = $progress{'header'}; + } + print $Global::original_stderr "\r",$progress{'status'}; + flush $Global::original_stderr; + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Sometimes SIGCHLD is not registered, so force reaper + $sleep = ::reap_usleep($sleep); + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on.\n"); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + start_more_jobs(); + $sleep = ::reap_usleep($sleep); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs.\n"); + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); + if($opt::progress) { + my %progress = progress(); + print $Global::original_stderr "\r", $progress{'status'}, "\n"; + flush $Global::original_stderr; + } +} + +sub toggle_progress { + # Turn on/off progress view + # Uses: + # $opt::progress + # $Global::original_stderr + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + print $Global::original_stderr init_progress(); + } +} + +sub progress { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "x"x($termcols+1); + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, "status" => $status); +} + +{ + my ($total, $first_completed, $smoothed_avg_time); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + $total ||= $Global::JobQueue->total_jobs(); + my $completed = 0; + for(values %Global::host) { $completed += $_->jobs_completed() } + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = $completed / $total; + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; + # These chars mess up display in the terminal + $arg =~ tr/[\011-\016\033\302-\365]//d; + my $bar_text = + sprintf("%d%% %d:%d=%ds %s", + $pctcomplete*100, $completed, $left, $eta, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($columns,$last_column_time); + + sub terminal_columns { + # Get the number of columns of the display + # Returns: + # number of columns of the screen + if(not $columns or $last_column_time < time) { + $last_column_time = time; + $columns = $ENV{'COLUMNS'}; + if(not $columns) { + my $resize = qx{ resize 2>/dev/null }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; + } + $columns ||= 80; + } + return $columns; + } +} + +sub get_job_with_sshlogin { + # Returns: + # next job object for $sshlogin if any available + my $sshlogin = shift; + my $job = undef; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job for this hostgroup + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + + my $clean_command = $job->replaced(); + if($clean_command =~ /^\s*$/) { + # Do not run empty lines + if(not $Global::JobQueue->empty()) { + return get_job_with_sshlogin($sshlogin); + } else { + return undef; + } + } + $job->set_sshlogin($sshlogin); + if($opt::retries and $clean_command and + $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + +sub __REMOTE_SSH__ {} + +sub read_sshloginfiles { + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand { + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $ENV{'HOME'}."/.parallel/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + if(not -r $ENV{'HOME'}."/.parallel/".$file) { + # Try prepending ~/.parallel + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } else { + $file = $ENV{'HOME'}."/.parallel/".$file; + } + } + return $file; +} + +sub read_sshloginfile { + # Returns: N/A + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin { + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + for my $s (split /,/, $sshlogin) { + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 8_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + if($sshlogin_string eq ":") { + $sshlogin->set_maxlength(Limits::Command::max_length()); + } else { + # If all chars needs to be quoted, every other character will be \ + $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); + } + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::cleanup) { + ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there are no remote --sshlogin.\n"); + } + } + } +} + +sub remote_hosts { + # Return sshlogins that are not ':' + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Returns: N/A + my $cmd = ""; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles\n"); + ::wait_and_exit(255); + } + $workdir ||= Job->new("")->workdir(); + $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; + } + } + $cmd .= "wait;"; + debug("init", "basesetup: $cmd\n"); + print `$cmd`; +} + +sub cleanup_basefile { + # Remove the basefiles transferred + # Returns: N/A + my $cmd=""; + my $workdir = Job->new("")->workdir(); + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; + } + } + $cmd .= "wait;"; + debug("init", "basecleanup: $cmd\n"); + print `$cmd`; +} + +sub filter_hosts { + my(@cores, @cpus, @maxline, @echo); + my $envvar = ::shell_quote_scalar($Global::envvar); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); + push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); + push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); + push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); + # 'echo' is used to get the best possible value for an ssh login time + push(@echo, $host."\t".$sshcmd." echo\n\0"); + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); + print $fh @cores, @cpus, @maxline, @echo; + close $fh; + # --timeout 5: Setting up an SSH connection and running a simple + # command should never take > 5 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon it overloaded, try 3 times + # -s 16000: Half of the max line on UnixWare + my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; + ::debug("init", $cmd, "\n"); + open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + my $prepend = ""; + while(<$host_fh>) { + if(/\'$/) { + # if last char = ' then append next line + # This may be due to quoting of $Global::envvar + $prepend .= $_; + next; + } + $_ = $prepend . $_; + $prepend = ""; + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[7] eq "15") { + # exit == 255 or signal == 15: ssh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + @down_hosts = uniq(@down_hosts); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ", + "number of cpus on $host. Using 1.\n"); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + close $host_fh; + $Global::debug or unlink $tmpfile; + delete @Global::host{@down_hosts}; + @down_hosts and ::warning("Removed @down_hosts\n"); + $Global::minimal_command_line_length = 8_000_000; + while (my ($sshlogin, $obj) = each %Global::host) { + if($sshlogin eq ":") { next } + $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus{$sshlogin}); + } else { + $obj->set_ncpus($ncores{$sshlogin}); + } + $obj->set_time_to_login($time_to_login{$sshlogin}); + $obj->set_maxlength($maxlen{$sshlogin}); + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, + int($maxlen{$sshlogin}/2)); + ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin}, + " ncores:", $ncores{$sshlogin}, + " time_to_login:", $time_to_login{$sshlogin}, + " maxlen:", $maxlen{$sshlogin}, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub onall { + sub tmp_joblog { + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my @command = @_; + if($Global::quoting) { + @command = shell_quote_empty(@command); + } + + # Copy all @fhlist into tempfiles + my @argfiles = (); + for my $fh (@fhlist) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((@opt::v) ? "-vv" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + ::debug("init", "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + +sub __SIGNAL_HANDLING__ {} + +sub save_original_signal_handler { + # Remember the original signal handler + # Returns: N/A + $SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X + $SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + unlink keys %Global::unlink; exit -1 }; + $SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + unlink keys %Global::unlink; exit -1 }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start +} + +sub list_running_jobs { + # Returns: N/A + for my $v (values %Global::running) { + print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n"; + } +} + +sub start_no_new_jobs { + # Returns: N/A + $SIG{TERM} = $Global::original_sig{TERM}; + print $Global::original_stderr + ("$Global::progname: SIGTERM received. No new jobs will be started.\n", + "$Global::progname: Waiting for these ", scalar(keys %Global::running), + " jobs to finish. Send SIGTERM again to stop now.\n"); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reaper { + # A job finished. + # Print the output. + # Start another job + # Returns: N/A + my $stiff; + my $children_reaped = 0; + debug("run", "Reaper "); + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + $children_reaped++; + if($Global::sshmaster{$stiff}) { + # This is one of the ssh -M: ignore + next; + } + my $job = $Global::running{$stiff}; + # '-a <(seq 10)' will give us a pid not in %Global::running + $job or next; + $job->set_exitstatus($? >> 8); + $job->set_exitsignal($? & 127); + debug("run", "died (", $job->exitstatus(), "): ", $job->seq()); + $job->set_endtime(::now()); + if($stiff == $Global::tty_taken) { + # The process that died had the tty => release it + $Global::tty_taken = 0; + } + + if(not $job->should_be_retried()) { + # The job is done + # Free the jobslot + push @Global::slots, $job->slot(); + if($opt::timeout) { + # Update average runtime for timeout + $Global::timeoutq->update_delta_time($job->runtime()); + } + # Force printing now if the job failed and we are going to exit + my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 + and $job->exitstatus()); + if($opt::keeporder and not $print_now) { + print_earlier_jobs($job); + } else { + $job->print(); + } + if($job->exitstatus()) { + process_failed_job($job); + } + + } + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + $sshlogin->inc_jobs_completed(); + $Global::total_running--; + delete $Global::running{$stiff}; + start_more_jobs(); + } + debug("run", "done "); + return $children_reaped; +} + +sub process_failed_job { + # The jobs had a exit status <> 0, so error + # Returns: N/A + my $job = shift; + $Global::exitstatus++; + $Global::total_failed++; + if($opt::halt_on_error) { + if($opt::halt_on_error == 1 + or + ($opt::halt_on_error < 1 and $Global::total_failed > 3 + and + $Global::total_failed / $Global::total_started > $opt::halt_on_error)) { + # If halt on error == 1 or --halt 10% + # we should gracefully exit + print $Global::original_stderr + ("$Global::progname: Starting no more jobs. ", + "Waiting for ", scalar(keys %Global::running), + " jobs to finish. This job failed:\n", + $job->replaced(),"\n"); + $Global::start_no_new_jobs ||= 1; + $Global::halt_on_error_exitstatus = $job->exitstatus(); + } elsif($opt::halt_on_error == 2) { + # If halt on error == 2 we should exit immediately + print $Global::original_stderr + ("$Global::progname: This job failed:\n", + $job->replaced(),"\n"); + exit ($job->exitstatus()); + } + } +} + +{ + my (%print_later,$job_end_sequence); + + sub print_earlier_jobs { + # Print jobs completed earlier + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_end_sequence ||= 1; + debug("run", "Looking for: $job_end_sequence ", + "Current: ", $job->seq(), "\n"); + for(my $j = $print_later{$job_end_sequence}; + $j or vec($Global::job_already_run,$job_end_sequence,1); + $job_end_sequence++, + $j = $print_later{$job_end_sequence}) { + debug("run", "Found job end $job_end_sequence"); + if($j) { + $j->print(); + delete $print_later{$job_end_sequence}; + } + } + } +} + +sub __USAGE__ {} + +sub wait_and_exit { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + if($error) { + # Kill all without printing + for my $job (values %Global::running) { + $job->kill("TERM"); + $job->kill("TERM"); + } + } + for (keys %Global::unkilled_children) { + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + wait(); + exit($error); +} + +sub die_usage { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "See 'man $Global::progname' for details", + "", + "When using programs that use GNU Parallel to process data for publication please cite:", + "", + "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", + ";login: The USENIX Magazine, February 2011:42-47.", + "", + "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", + ""); +} + + +sub citation_notice { + # if --no-notice or --plain: do nothing + # if stderr redirected: do nothing + # if ~/.parallel/will-cite: do nothing + # else: print citation notice to stderr + if($opt::no_notice + or + $opt::plain + or + not -t $Global::original_stderr + or + -e $ENV{'HOME'}."/.parallel/will-cite") { + # skip + } else { + print $Global::original_stderr + ("When using programs that use GNU Parallel to process data for publication please cite:\n", + "\n", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", + " ;login: The USENIX Magazine, February 2011:42-47.\n", + "\n", + "This helps funding further development; and it won't cost you a cent.\n", + "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", + "\n", + "To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n", + ); + flush $Global::original_stderr; + } +} + + +sub warning { + my @w = @_; + my $fh = $Global::original_stderr || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Warning: ", @w; +} + + +sub error { + my @w = @_; + my $fh = $Global::original_stderr || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Error: ", @w; +} + + +sub die_bug { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug.\n", + "Please contact <parallel\@gnu.org> and include:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version { + # Returns: N/A + if($opt::tollef and not $opt::gnu) { + print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; + } + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication please cite:\n", + "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ", + ";login: The USENIX Magazine, February 2011:42-47.\n", + "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", + ); +} + +sub bibtex { + # Returns: N/A + if($opt::tollef and not $opt::gnu) { + print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; + } + print join("\n", + "When using programs that use GNU Parallel to process data for publication please cite:", + "", + "\@article{Tange2011a,", + " title = {GNU Parallel - The Command-Line Power Tool},", + " author = {O. Tange},", + " address = {Frederiksberg, Denmark},", + " journal = {;login: The USENIX Magazine},", + " month = {Feb},", + " number = {1},", + " volume = {36},", + " url = {http://www.gnu.org/s/parallel},", + " year = {2011},", + " pages = {42-47}", + "}", + "", + "(Feel free to use \\nocite{Tange2011a})", + "", + "This helps funding further development.", + "", + "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", + "" + ); + while(not -e $ENV{'HOME'}."/.parallel/will-cite") { + print "\nType: 'will cite' and press enter.\n> "; + my $input = <STDIN>; + if($input =~ /will cite/i) { + mkdir $ENV{'HOME'}."/.parallel"; + open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite") + || ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite"); + close $fh; + print "\nThank you for your support. It is much appreciated. The citation\n", + "notice is now silenced.\n"; + } + } +} + +sub show_limits { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal used size of command: ",Limits::Command::max_length(),"\n", + "\n", + "Execution of will continue now, and it will try to read its input\n", + "and run commands; if this is not what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub __GENERIC_COMMON_FUNCTION__ {} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty { + my $a = shift; + return $a ? $a : ""; +} + +{ + my $hostname; + sub hostname { + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'}); + } + return @which; +} + +{ + my ($regexp,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rush rzsh sash sh static-sh tcsh yash zsh + my @shells = qw(ash bash csh dash fdsh fish fizsh ksh + ksh93 mksh pdksh posh rbash rush rzsh + sash sh static-sh tcsh yash zsh -sh -csh); + # Can be formatted as: + # [sh] -sh sh busybox sh + # /bin/sh /sbin/sh /opt/csw/sh + # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh + my $shell = "(?:".join("|",@shells).")"; + $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )'; + %fakename = ( + # csh and tcsh disguise themselves as -sh/-csh + "-sh" => ["csh", "tcsh"], + "-csh" => ["tcsh", "csh"], + ); + } + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); + if($name_of_ref->{$testpid} =~ /$regexp/o) { + ::debug("init", "which ".($3||$6)." => "); + $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'cygwin' => $sysv, + 'msys' => $sysv, + 'dec_osf' => $sysv, + 'darwin' => $bsd, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + ); + } + $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + /(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_"); + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub reap_usleep { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reaper()) { + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + usleep($ms); + Job::exit_if_disk_full(); + if($opt::linebuffer) { + for my $job (values %Global::running) { + $job->print(); + } + } + # Sleep exponentially longer (1.1^n) if a job did not finish + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub usleep { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug(int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub now { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my $s = shift; + $s =~ s/ki/*1024/gi; + $s =~ s/mi/*1024*1024/gi; + $s =~ s/gi/*1024*1024*1024/gi; + $s =~ s/ti/*1024*1024*1024*1024/gi; + $s =~ s/pi/*1024*1024*1024*1024*1024/gi; + $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + $s =~ s/K/*1024/g; + $s =~ s/M/*1024*1024/g; + $s =~ s/G/*1024*1024*1024/g; + $s =~ s/T/*1024*1024*1024*1024/g; + $s =~ s/P/*1024*1024*1024*1024*1024/g; + $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + $s =~ s/k/*1000/g; + $s =~ s/m/*1000*1000/g; + $s =~ s/g/*1000*1000*1000/g; + $s =~ s/t/*1000*1000*1000*1000/g; + $s =~ s/p/*1000*1000*1000*1000*1000/g; + $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s = eval $s; + ::debug($s); + return $s; +} + +sub tmpfile { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filename = file name created + return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); +} + +sub __DEBUGGING__ {} + +sub debug { + # Uses: + # $Global::debug + # %Global::fd + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fd{1}) { + # Original stdout was saved + my $stdout = $Global::fd{1}; + print $stdout @_[1..$#_]; + } else { + print @_[1..$#_]; + } + } +} + +sub my_memory_usage { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("</proc/$pid/stat"); + + my $data = <$fh>; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + print $Global::original_stderr $err; + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + +sub __OBJECT_ORIENTED_PARTS__ {} + +package SSHLogin; + +sub new { + my $class = shift; + my $sshlogin_string = shift; + my $ncpus; + my %hostgroups; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + if($sshlogin_string =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + if ($sshlogin_string =~ s:^(\d+)/::) { + # Override default autodetected ncpus unless missing + $ncpus = $1; + } + my $string = $sshlogin_string; + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + my @unget = (); + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'hostgroups' => \%hostgroups, + 'sshcommand' => undef, + 'serverlogin' => undef, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . + $no_slash_string, + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . + $no_slash_string, + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY { + my $self = shift; + # Remove temporary files if they are created. + unlink $self->{'loadavg_file'}; + unlink $self->{'swap_activity_file'}; +} + +sub string { + my $self = shift; + return $self->{'string'}; +} + +sub jobs_running { + my $self = shift; + + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed { + my $self = shift; + $self->{'jobs_completed'}++; +} + +sub set_max_jobs_running { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub swapping { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + if(-r $self->{'swap_activity_file'}) { + open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + my $swap_activity; + $swap_activity = swapactivityscript(); + if($self->{'string'} ne ":") { + $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + for my $os (keys %vmstat) { + #q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ]. + # q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ]; + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); + $script = $Global::envvar. " " .$perlscript; + } + return $script; + } +} + +sub too_fast_remote_login { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high { + my $self = shift; + my $loadavg = $self->loadavg(); + return (not defined $loadavg or + $loadavg > $self->max_loadavg()); +} + +sub loadavg { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting for disk + # or CPU right now. So it is the server load this instant and not averaged over + # several minutes. This is needed so GNU Parallel will at most start one job + # that will push the load over the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + my $self = shift; + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/ = undef; + my $load_out = <$load_fh>; + close $load_fh; + my $load =()= ($load_out=~/(^[DR]....[^\[])/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'}); + } else { + ::die_bug("loadavg_invalid_content: $load_out"); + } + ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); + if(time - $self->{'last_loadavg_update'} > 10) { + # last loadavg was started 10 seconds ago + ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", + $self->{'loadavg_file'}); + $update_loadavg_file = 1; + } + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->sshcommand() . " " . $self->serverlogin() . " "; + } + # TODO Is is called 'ps ax -o state,command' on other platforms? + $cmd .= "ps ax -o state,command"; + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'loadavg_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); + qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; + } + return $self->{'loadavg'}; +} + +sub max_loadavg { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; + $load = $self->compute_max_loadavg($opt_load_file); + } else { + print $Global::original_stderr "Cannot open $loadspec\n"; + ::wait_and_exit(255); + } + } else { + print $Global::original_stderr "Parsing of --load failed\n"; + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +sub processes_available_by_system_limit { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + + my $system_limit = 0; + my @jobs = (); + my $job; + my @args = (); + my $arg; + my $more_filehandles = 1; + my $max_system_proc_reached = 0; + my $slow_spawining_warning_printed = 0; + my $time = time; + my %fh; + my @children; + + # Reserve filehandles + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + for my $i (1..12) { + open($fh{"init-$i"}, "<", "/dev/null"); + } + + for(1..2) { + # System process limit + my $child; + if($child = fork()) { + push (@children,$child); + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # The child takes one process slot + # It will be killed later + $SIG{TERM} = $Global::original_sig{TERM}; + sleep 10000000; + exit(0); + } else { + $max_system_proc_reached = 1; + } + } + my $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + my $before_getting_arg = time; + if($Global::semaphore or $opt::pipe) { + # Skip: No need to get args + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + last; + } else { + ($job) = $Global::JobQueue->get(); + push(@jobs, $job); + } + } else { + ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + $Global::JobQueue->empty() and last; + ($job) = $Global::JobQueue->get(); + push(@jobs, $job); + } + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles when grouping + # Every simultaneous process uses 2 filehandles when compressing + $more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null") + && open($fh{$system_limit*10+2}, "<", "/dev/null") + && open($fh{$system_limit*10+3}, "<", "/dev/null") + && open($fh{$system_limit*10+4}, "<", "/dev/null"); + + # System process limit + my $child; + if($child = fork()) { + push (@children,$child); + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # The child takes one process slot + # It will be killed later + $SIG{TERM} = $Global::original_sig{TERM}; + sleep 10000000; + exit(0); + } else { + $max_system_proc_reached = 1; + } + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", + $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01 + and not $slow_spawining_warning_printed) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + print $Global::original_stderr + ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n", + "Consider adjusting -j. Press CTRL-C to stop.\n"); + $slow_spawining_warning_printed = 1; + } + } + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); + $Global::JobQueue->unget(@jobs); + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", + "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", + "raising ulimit -n or /etc/security/limits.conf may help.\n"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ", $system_limit, + " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; +} + +sub simultaneous_sshlogin_limit { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->serverlogin(); + ::warning("ssh to $serverlogin only allows ", + "for $ssh_limit simultaneous logins.\n", + "You may raise this by changing ", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", + "Using only ",$ssh_limit-1," connections ", + "to avoid race conditions.\n"); + } + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + return $ssh_limit; +} + +sub simultaneous_sshlogin { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l + # Returns: + # Number of succesful logins + my $self = shift; + my $wanted_processes = shift; + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin </dev/null 2>&1 &"x$wanted_processes; + ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes { + # Parse the number of processes that the user asked for using -j + # Returns: + # the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if($opt_P =~ /^\+(\d+)$/) { + # E.g. -P +2 + my $j = $1; + $processes = + $self->ncpus() + $j; + } elsif ($opt_P =~ /^-(\d+)$/) { + # E.g. -P -2 + my $j = $1; + $processes = + $self->ncpus() - $j; + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% + my $j = $1; + $processes = + $self->ncpus() * $j / 100; + } elsif ($opt_P =~ /^(\d+)$/) { + $processes = $1; + if($processes == 0) { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } + } elsif (-f $opt_P) { + $Global::max_procs_file = $opt_P; + $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9]; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; + $processes = $self->user_requested_processes($opt_P_file); + } else { + ::error("Cannot open $opt_P.\n"); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); + ::die_usage(); + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus { + my $self = shift; + if(not defined $self->{'ncpus'}) { + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + if($serverlogin eq ":") { + if($opt::use_cpus_instead_of_cores) { + $self->{'ncpus'} = no_of_cpus(); + } else { + $self->{'ncpus'} = no_of_cores(); + } + } else { + my $ncpu; + my $sqe = ::shell_quote_scalar($Global::envvar); + if($opt::use_cpus_instead_of_cores) { + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); + } else { + ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ", + "number of cpus on $serverlogin ($ncpu). Using 1.\n"); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + +sub no_of_cpus { + # Returns: + # Number of physical CPUs + local $/="\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cpus; + if ($^O eq 'linux') { + $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cpus = no_of_cpus_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cpus = no_of_cpus_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cpus = no_of_cpus_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cpus = no_of_cpus_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cpus = no_of_cpus_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cpus = no_of_cpus_solaris(); + } elsif ($^O eq 'aix') { + $no_of_cpus = no_of_cpus_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cpus = no_of_cpus_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cpus = no_of_cpus_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cpus = no_of_cpus_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cpus = no_of_cpus_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cpus = no_of_cpus_tru64(); + } else { + $no_of_cpus = (no_of_cpus_gnu_linux() + || no_of_cpus_freebsd() + || no_of_cpus_netbsd() + || no_of_cpus_openbsd() + || no_of_cpus_hurd() + || no_of_cpus_darwin() + || no_of_cpus_solaris() + || no_of_cpus_aix() + || no_of_cpus_hpux() + || no_of_cpus_qnx() + || no_of_cpus_openserver() + || no_of_cpus_irix() + || no_of_cpus_tru64() + # Number of cores is better than no guess for #CPUs + || nproc() + ); + } + if($no_of_cpus) { + chomp $no_of_cpus; + return $no_of_cpus; + } else { + ::warning("Cannot figure out number of cpus. Using 1.\n"); + return 1; + } +} + +sub no_of_cores { + # Returns: + # Number of CPU cores + local $/="\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cores; + if ($^O eq 'linux') { + $no_of_cores = no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cores = no_of_cores_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cores = no_of_cores_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cores = no_of_cores_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cores = no_of_cores_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cores = no_of_cores_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cores = no_of_cores_solaris(); + } elsif ($^O eq 'aix') { + $no_of_cores = no_of_cores_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cores = no_of_cores_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cores = no_of_cores_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cores = no_of_cores_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cores = no_of_cores_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cores = no_of_cores_tru64(); + } else { + $no_of_cores = (no_of_cores_gnu_linux() + || no_of_cores_freebsd() + || no_of_cores_netbsd() + || no_of_cores_openbsd() + || no_of_cores_hurd() + || no_of_cores_darwin() + || no_of_cores_solaris() + || no_of_cores_aix() + || no_of_cores_hpux() + || no_of_cores_qnx() + || no_of_cores_openserver() + || no_of_cores_irix() + || no_of_cores_tru64() + || nproc() + ); + } + if($no_of_cores) { + chomp $no_of_cores; + return $no_of_cores; + } else { + ::warning("Cannot figure out number of CPU cores. Using 1.\n"); + return 1; + } +} + +sub nproc { + # Returns: + # Number of cores using `nproc` + my $no_of_cores = `nproc 2>/dev/null`; + return $no_of_cores; +} + +sub no_of_cpus_gnu_linux { + # Returns: + # Number of physical CPUs on GNU/Linux + # undef if not GNU/Linux + my $no_of_cpus; + my $no_of_cores; + if(-e "/proc/cpuinfo") { + $no_of_cpus = 0; + $no_of_cores = 0; + my %seen; + open(my $in_fh, "<", "/proc/cpuinfo") || return undef; + while(<$in_fh>) { + if(/^physical id.*[:](.*)/ and not $seen{$1}++) { + $no_of_cpus++; + } + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + return ($no_of_cpus||$no_of_cores); +} + +sub no_of_cores_gnu_linux { + # Returns: + # Number of CPU cores on GNU/Linux + # undef if not GNU/Linux + my $no_of_cores; + if(-e "/proc/cpuinfo") { + $no_of_cores = 0; + open(my $in_fh, "<", "/proc/cpuinfo") || return undef; + while(<$in_fh>) { + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + return $no_of_cores; +} + +sub no_of_cpus_freebsd { + # Returns: + # Number of physical CPUs on FreeBSD + # undef if not FreeBSD + my $no_of_cpus = + (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'` + or + `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_freebsd { + # Returns: + # Number of CPU cores on FreeBSD + # undef if not FreeBSD + my $no_of_cores = + (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'` + or + `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_netbsd { + # Returns: + # Number of physical CPUs on NetBSD + # undef if not NetBSD + my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_netbsd { + # Returns: + # Number of CPU cores on NetBSD + # undef if not NetBSD + my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_openbsd { + # Returns: + # Number of physical CPUs on OpenBSD + # undef if not OpenBSD + my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_openbsd { + # Returns: + # Number of CPU cores on OpenBSD + # undef if not OpenBSD + my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cpus = `nproc`; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cores = `nproc`; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_darwin { + # Returns: + # Number of physical CPUs on Mac Darwin + # undef if not Mac Darwin + my $no_of_cpus = + (`sysctl -n hw.physicalcpu 2>/dev/null` + or + `sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`); + return $no_of_cpus; +} + +sub no_of_cores_darwin { + # Returns: + # Number of CPU cores on Mac Darwin + # undef if not Mac Darwin + my $no_of_cores = + (`sysctl -n hw.logicalcpu 2>/dev/null` + or + `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); + return $no_of_cores; +} + +sub no_of_cpus_solaris { + # Returns: + # Number of physical CPUs on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cores_solaris { + # Returns: + # Number of CPU cores on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cpus_aix { + # Returns: + # Number of physical CPUs on AIX + # undef if not AIX + my $no_of_cpus = 0; + if(-x "/usr/sbin/lscfg") { + open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") + || return undef; + $no_of_cpus = <$in_fh>; + chomp ($no_of_cpus); + close $in_fh; + } + return $no_of_cpus; +} + +sub no_of_cores_aix { + # Returns: + # Number of CPU cores on AIX + # undef if not AIX + my $no_of_cores; + if(-x "/usr/bin/vmstat") { + open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; + while(<$in_fh>) { + /lcpu=([0-9]*) / and $no_of_cores = $1; + } + close $in_fh; + } + return $no_of_cores; +} + +sub no_of_cpus_hpux { + # Returns: + # Number of physical CPUs on HP-UX + # undef if not HP-UX + my $no_of_cpus = + (`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`); + return $no_of_cpus; +} + +sub no_of_cores_hpux { + # Returns: + # Number of CPU cores on HP-UX + # undef if not HP-UX + my $no_of_cores = + (`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`); + return $no_of_cores; +} + +sub no_of_cpus_qnx { + # Returns: + # Number of physical CPUs on QNX + # undef if not QNX + # BUG: It is now known how to calculate this. + my $no_of_cpus = 0; + return $no_of_cpus; +} + +sub no_of_cores_qnx { + # Returns: + # Number of CPU cores on QNX + # undef if not QNX + # BUG: It is now known how to calculate this. + my $no_of_cores = 0; + return $no_of_cores; +} + +sub no_of_cpus_openserver { + # Returns: + # Number of physical CPUs on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cpus = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cpus; +} + +sub no_of_cores_openserver { + # Returns: + # Number of CPU cores on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cores = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cores; +} + +sub no_of_cpus_irix { + # Returns: + # Number of physical CPUs on IRIX + # undef if not IRIX + my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cpus; +} + +sub no_of_cores_irix { + # Returns: + # Number of CPU cores on IRIX + # undef if not IRIX + my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cores; +} + +sub no_of_cpus_tru64 { + # Returns: + # Number of physical CPUs on Tru64 + # undef if not Tru64 + my $no_of_cpus = `sizer -pr`; + return $no_of_cpus; +} + +sub no_of_cores_tru64 { + # Returns: + # Number of CPU cores on Tru64 + # undef if not Tru64 + my $no_of_cores = `sizer -pr`; + return $no_of_cores; +} + +sub sshcommand { + my $self = shift; + if (not defined $self->{'sshcommand'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'sshcommand'}; +} + +sub serverlogin { + my $self = shift; + if (not defined $self->{'serverlogin'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'serverlogin'}; +} + +sub sshcommand_of_sshlogin { + # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') + # 'user@server' -> ('ssh','user@server') + # 'myssh user@server' -> ('myssh','user@server') + # 'myssh -l user server' -> ('myssh -l user','server') + # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') + # Returns: + # sshcommand - defaults to 'ssh' + # login@host + my $self = shift; + my ($sshcmd, $serverlogin); + if($self->{'string'} =~ /(.+) (\S+)$/) { + # Own ssh command + $sshcmd = $1; $serverlogin = $2; + } else { + # Normal ssh + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + $sshcmd = "ssh -S ".$control_path; + $serverlogin = $self->{'string'}; + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + $SIG{'TERM'} = undef; + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + # Run a sleep that outputs data, so it will discover if the ssh connection closes. + my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); + my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); + exec(@master); + } + } + } else { + $sshcmd = "ssh"; $serverlogin = $self->{'string'}; + } + } + $self->{'sshcommand'} = $sshcmd; + $self->{'serverlogin'} = $serverlogin; +} + +sub control_path_dir { + # Returns: + # path to directory + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + $self->{'control_path_dir'} = + File::Temp::tempdir($ENV{'HOME'} + . "/.parallel/tmp/control_path_dir-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file, " is not readable and will not be transferred.\n"); + return "true"; + } + my $rsync_destdir; + if($file =~ m:^/:) { + # rsync /foo/bar / + $rsync_destdir = "/"; + } else { + $rsync_destdir = ::shell_quote_file($workdir); + } + $file = ::shell_quote_file($file); + my $sshcmd = $self->sshcommand(); + my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); + my $serverlogin = $self->serverlogin(); + # Make dir if it does not exist + return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . + rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; +} + +sub cleanup_cmd { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + + $f = ::shell_quote_file($f); + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); +} + +{ + my $rsync; + + sub rsync { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + if(not $rsync) { + my @out = `rsync --version`; + for (@out) { + if(/version (\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync = "rsync --protocol 30"; + } else { + $rsync = "rsync"; + } + } + } + $rsync or ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync; + } +} + + +package JobQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $return_files = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $return_files); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + return ($job); + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + my $job = Job->new($commandline); + return $job; + } else { + return undef; + } + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) + && $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + my $job; + my @queue; + my $start = time; + while($job = $self->get()) { + if(time - $start > 10) { + ::warning("Reading all arguments takes longer than 10 seconds.\n"); + $opt::eta && ::warning("Consider removing --eta.\n"); + $opt::bar && ::warning("Consider removing --bar.\n"); + last; + } + push @queue, $job; + } + while($job = $self->get()) { + push @queue, $job; + } + + $self->unget(@queue); + $self->{'total_jobs'} = $#queue+1; + } + return $self->{'total_jobs'}; +} + +sub next_seq { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + 'stdin' => undef, # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + 'remaining' => "", # remaining data not sent to stdin (used for --pipe) + 'datawritten' => 0, # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + }, ref($class) || $class; +} + +sub replaced { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +sub seq { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub slot { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +{ + my($cattail); + + sub cattail { + # Returns: + # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] + if(not $cattail) { + $cattail = q{ + # cat followed by tail. + # If $writerpid dead: finish after this round + use Fcntl; + + $|=1; + + my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,32768); + if($read) { + # We can unlink the file now: The writer has written something + -e $unlink_file and unlink $unlink_file; + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be more to read => exit + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 30 milliseconds + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + if($opt::results) { + my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + my $dir = $opt::results."/".$args_as_dirname; + if(eval{ File::Path::mkpath($dir); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $dir = $opt::results."/".$args_as_dirname; + File::Path::mkpath($dir); + } + # prefix/name1/val1/name2/val2/stdout + $outname = "$dir/stdout"; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'.\n"); + ::wait_and_exit(255); + } + # prefix/name1/val1/name2/val2/stderr + $errname = "$dir/stderr"; + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'.\n"); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if(@Global::tee_jobs) { + # files must be removed when the tee is done + } elsif($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fd{1}) || die; + open($errfhw,">&",$Global::fd{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + for my $fdno (1,2) { + my $wpid = open(my $fdw,"|-","$opt::compress_program >>". + $self->fh($fdno,'name')) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } + } elsif(not $opt::ungroup) { + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed separately + # and fdr can be seeked separately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if required + $Global::debug or unlink $self->fh($fdno,"unlink"); + } + } + if($opt::linebuffer) { + # Set non-blocking when using --linebuffer + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + } +} + +sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + my $testdir = shift; + + my $upper = 8_000_000; + my $len = 8; + my $dir="x"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir="x"x$len; + } while (mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir="x"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; +} + +sub set_fh { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + syswrite($stdin_fh,$$remaining_ref); +} + +sub set_stdin_buffer { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $block_ref = ref to $block to pass on + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; + $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); + if($opt::remove_rec_sep) { + remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); + } + $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; + $self->{'stdin_buffer_pos'} = 0; +} + +sub stdin_buffer_length { + my $self = shift; + return $self->{'stdin_buffer_length'}; +} + +sub remove_rec_sep { + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + $$block_ref =~ s/$recend$recstart//gos; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; +} + +sub non_block_write { + my $self = shift; + my $something_written = 0; + use POSIX qw(:errno_h); +# use Fcntl; +# my $flags = ''; + for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { + my $in = $self->fh(0,"w"); +# fcntl($in, F_GETFL, $flags) +# or die "Couldn't get flags for HANDLE : $!\n"; +# $flags |= O_NONBLOCK; +# fcntl($in, F_SETFL, $flags) +# or die "Couldn't set flags for HANDLE: $!\n"; + my $rv = syswrite($in, $buf); + if (!defined($rv) && $! == EAGAIN) { + # would block + $something_written = 0; + } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { + # incomplete write + # Remove the written part + $self->{'stdin_buffer_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + my $a=""; + $self->set_stdin_buffer(\$a,\$a,"",""); + $something_written = $rv; + } + } + + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; +} + +sub runtime { + # Returns: + # Run time in seconds + my $self = shift; + return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; +} + +sub timedout { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill { + # Kill the job. + # Send the signals to (grand)*children and pid. + # If no signals: TERM TERM KILL + # Wait 200 ms after each TERM. + # Input: + # @signals = signals to send + my $self = shift; + my @signals = @_; + my @family_pids = $self->family_pids(); + # Record this jobs as failed + $self->set_exitstatus(-1); + # Send two TERMs to give time to clean up + ::debug("run", "Kill seq ", $self->seq(), "\n"); + my @send_signals = @signals || ("TERM", "TERM", "KILL"); + for my $signal (@send_signals) { + my $alive = 0; + for my $pid (@family_pids) { + if(kill 0, $pid) { + # The job still running + kill $signal, $pid; + $alive = 1; + } + } + # If a signal was given as input, do not do the sleep below + @signals and next; + + if($signal eq "TERM" and $alive) { + # Wait up to 200 ms between TERMs - but only if any pids are alive + my $sleep = 1; + for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; + $sleepsum += $sleep) { + $sleep = ::reap_usleep($sleep); + } + } + } +} + +sub family_pids { + # Find the pids with this->pid as (grand)*parent + # Returns: + # @pids = pids of (grand)*children + my $self = shift; + my $pid = $self->pid(); + my @pids; + + my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); + + my @more = ($pid); + # While more (grand)*children + while(@more) { + my @m; + push @pids, @more; + for my $parent (@more) { + if($children_of_ref->{$parent}) { + # add the children of this parent + push @m, @{$children_of_ref->{$parent}}; + } + } + @more = @m; + } + return (@pids); +} + +sub failed { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +sub wrapped { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_partials) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # $Global::envvar + # $opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_partials + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $Global::envvar.$self->replaced(); + if($opt::shellquote) { + # Prepend echo + # and quote twice + $command = "echo " . + ::shell_quote_scalar(::shell_quote_scalar($command)); + } + if($opt::nice) { + # Prepend \nice -n19 $SHELL -c + # and quote. + # The '\' before nice is needed to avoid tcsh's built-in + $command = '\nice'. " -n". $opt::nice. " ". + $Global::shell. " -c ". + ::shell_quote_scalar($command); + } + if($opt::cat) { + # Prepend 'cat > {};' + # Append '_EXIT=$?;(rm {};exit $_EXIT)' + $command = + $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). + $command. + $self->{'commandline'}->replace_placeholders( + ["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0); + } elsif($opt::fifo) { + # Prepend 'mkfifo {}; (' + # Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)' + $command = + $self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0). + $command. + $self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ", + "wait \$_PID; _EXIT=\$?; ", + "rm \257<\257>; exit \$_EXIT"], + 0,0); + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_partials) { + # Prepend: + # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | + $command = (shift @Global::cat_partials). "|". "(". $command. ")"; + } elsif($opt::pipe) { + # Prepend EOF-detector to avoid starting $command if EOF. + # The $tmpfile might exist if run on a remote system - we accept that risk + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); + # Unlink to avoid leaving files if --dry-run or --sshlogin + unlink $tmpfile; + $command = + # Exit value: + # empty input = true + # some input = exit val from command + qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. + qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. + qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }. + "($command);"; + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; +} + +sub sshlogin { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub sshlogin_wrap { + # Wrap the command with the commands needed to run remotely + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + my $self = shift; + my $command = shift; + if(not defined $self->{'sshlogin_wrap'}) { + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my ($pre,$post,$cleanup)=("","",""); + + if($serverlogin eq ":") { + # No transfer neeeded + $self->{'sshlogin_wrap'} = $command; + } else { + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; + } + # If the remote login shell is (t)csh then use 'setenv' + # otherwise use 'export' + # We cannot use parse_env_var(), as PARALLEL_SEQ changes + # for each command + my $parallel_env = + ($Global::envwarn + . q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null } + . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; } + . q{ setenv PARALLEL_PID '$PARALLEL_PID' } + . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; } + . q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' }); + my $remote_pre = ""; + my $ssh_options = ""; + if(($opt::pipe or $opt::pipepart) and $opt::ctrlc + or + not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) { + # TODO Determine if this is needed + # Propagating CTRL-C to kill remote jobs requires + # remote jobs to be run with a terminal. + $ssh_options = "-tt -oLogLevel=quiet"; +# $ssh_options = ""; + # tty - check if we have a tty. + # stty: + # -onlcr - make output 8-bit clean + # isig - pass CTRL-C as signal + # -echo - do not echo input + $remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;'); + } + if($opt::workdir) { + my $wd = ::shell_quote_file($self->workdir()); + $remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd . + ::shell_quote_scalar("; cd ") . $wd . + # exit 255 (instead of exec false) would be the correct thing, + # but that fails on tcsh + ::shell_quote_scalar(qq{ || exec false;}); + } + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # It works on Linux but not Solaris + # Finishes on Solaris, but wrong exit code: + # $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)}; + # Hangs on Solaris, but correct exit code on Linux: + # $SIG{CHLD} = sub { $done = 1 }; + # $p->poll; + my $signal_script = "perl -e '". + q{ + use IO::Poll; + $SIG{CHLD} = sub { $done = 1 }; + $p = IO::Poll->new; + $p->mask(STDOUT, POLLHUP); + $pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"} + $p->poll; + kill SIGHUP, -${pid} unless $done; + wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } . "' "; + $signal_script =~ s/\s+/ /g; + + $self->{'sshlogin_wrap'} = + ($pre + . "$sshcmd $ssh_options $serverlogin $parallel_env " + . $remote_pre +# . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command)) + . ::shell_quote_scalar($command) + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}; +} + +sub transfer { + # Files to transfer + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + my @transfer = (); + $self->{'transfersize'} = 0; + if($opt::transfer) { + for my $record (@{$self->{'commandline'}{'arg_list'}}) { + # Merge arguments from records into args + for my $arg (@$record) { + CORE::push @transfer, $arg->orig(); + # filesize + if(-e $arg->orig()) { + $self->{'transfersize'} += (stat($arg->orig()))[7]; + } + } + } + } + return @transfer; +} + +sub transfersize { + my $self = shift; + return $self->{'transfersize'}; +} + +sub sshtransfer { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub sshreturn { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:); # Is the path relative? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); + my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; + } + return $cleancmd; +} + +sub cleanup { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub workdir { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:/+$::; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = ::shell_quote_scalar($workdir); + } + return $self->{'workdir'}; +} + +sub parentdirs_of { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + + if($Global::interactive or $Global::stderr_verbose) { + if($Global::interactive) { + print $Global::original_stderr "$command ?..."; + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + my $answer = <$tty_fh>; + close $tty_fh; + my $run_yes = ($answer =~ /^\s*y/i); + if (not $run_yes) { + $command = "true"; # Run the command 'true' + } + } else { + print $Global::original_stderr "$command\n"; + } + } + + my $pid; + $job->openoutputfiles(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); + + if(($opt::dryrun or $Global::verbose) and $opt::ungroup) { + if($Global::verbose <= 1) { + print $stdout_fh $job->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $stdout_fh $command,"\n"; + } + } + if($opt::dryrun) { + $command = "true"; + } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-pipe"); + 1; + }; + $job->set_fh(0,"w",$stdin_fh); + } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 + and $job->sshlogin()->string() eq ":") { + # Give STDIN to the first job if using -a (but only if running + # locally - otherwise CTRL-C does not work for other jobs Bug#36585) + *IN = *STDIN; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-a"); + 1; + }; + # Re-open to avoid complaining + open(STDIN, "<&", $Global::original_stdin) + or ::die_bug("dup-\$Global::original_stdin: $!"); + } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-/dev/tty"); + $Global::tty_taken = $pid; + close $devtty_fh; + 1; + }; + } else { + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-gensym"); + 1; + }; + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub tmux_wrap { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx"); + $Global::unlink{$tmpfile}=1; + close $fh; + unlink $tmpfile; + my $visual_command = $self->replaced(); + my $title = $visual_command; + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]//d; + + my $tmux; + if($Global::total_running == 0) { + $tmux = "tmux new-session -s p$$ -d -n ". + ::shell_quote_scalar($title); + print $Global::original_stderr "See output with: tmux attach -t p$$\n"; + } else { + $tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title); + } + return "mkfifo $tmpfile; $tmux ". + # Run in tmux + ::shell_quote_scalar( + "(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&". + "echo ".::shell_quote_scalar($visual_command).";". + "echo \007Job finished at: `date`;sleep 10"). + # Run outside tmux + # Read the first line from the fifo and use that as status code + "; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` "; +} + +sub is_already_in_results { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + my $dir = $opt::results."/".$args_as_dirname; + ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); + return -e "$dir/stdout"; +} + +sub is_already_in_joblog { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub should_be_retried { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if (not $opt::retries) { + return 0; + } + if(not $self->exitstatus()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0 + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + if($self->total_failed() == $opt::retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + $self->set_endtime(undef); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; + } + } +} + +sub print { + # Print the output of the jobs + # Returns: N/A + + my $self = shift; + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + unlink $self->fh(1,"name"); + } + if($opt::pipe and $self->virgin()) { + # Skip --joblog, --dryrun, --verbose + } else { + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + } + + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + # Check for disk full + exit_if_disk_full(); + + if(($opt::dryrun or $Global::verbose) + and + not $self->{'verbose_printed'}) { + $self->{'verbose_printed'}++; + if($Global::verbose <= 1) { + print STDOUT $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print STDOUT $self->wrapped(),"\n"; + } + # If STDOUT and STDERR are merged, + # we want the command to be printed first + # so flush to avoid STDOUT being buffered + flush STDOUT; + } + } + for my $fdno (sort { $a <=> $b } keys %Global::fd) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fd = $Global::fd{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):"); + if($opt::files) { + # If --compress: $in_fh must be closed first. + close $self->fh($fdno,"w"); + close $in_fh; + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files if --files was set + for my $fdno (1,2) { + unlink $self->fh($fdno,"name"); + unlink $self->fh($fdno,"unlink"); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fd $self->fh($fdno,"name"),"\n"; + } + } elsif($opt::linebuffer) { + # Line buffered print out + $self->linebuffer_print($fdno,$in_fh,$out_fd); + } else { + my $buf; + close $self->fh($fdno,"w"); + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + if($opt::tag or defined $opt::tagstring) { + my $tag = $self->tag(); + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + while(<$in_fh>) { + if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { + # Skip + } else { + print $out_fd $tag,$_; + } + # At most run the loop once + last; + } + } + while(<$in_fh>) { + print $out_fd $tag,$_; + } + } else { + my $buf; + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + sysread($in_fh,$buf,1_000); + $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + print $out_fd $buf; + } + while(sysread($in_fh,$buf,32768)) { + print $out_fd $buf; + } + } + close $in_fh; + } + flush $out_fd; + } + ::debug("print", "<<joboutput @command\n"); +} + +sub linebuffer_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $partial = \$self->{'partial_line',$fdno}; + + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($opt::compress) { + # Blocked reading in final round + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags + fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + } + } + # This seek will clear EOF + seek $in_fh, tell($in_fh), 0; + # The read is non-blocking: The $in_fh is set to non-blocking. + # 32768 --tag = 5.1s + # 327680 --tag = 4.4s + # 1024000 --tag = 4.4s + # 3276800 --tag = 4.3s + # 32768000 --tag = 4.7s + # 10240000 --tag = 4.3s + while(read($in_fh,substr($$partial,length $$partial),3276800)) { + # Append to $$partial + # Find the last \n + my $i = rindex($$partial,"\n"); + if($i != -1) { + # One or more complete lines were found + if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + # Length of partial line has changed: Find the last \n again + $i = rindex($$partial,"\n"); + } + if($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + my $tag = $self->tag(); + substr($$partial,0,$i+1) =~ s/^/$tag/gm; + # Length of partial line has changed: Find the last \n again + $i = rindex($$partial,"\n"); + } + # Print up to and including the last \n + print $out_fd substr($$partial,0,$i+1); + # Remove the printed part + substr($$partial,0,$i+1)=""; + } + } + if(defined $self->{'exitstatus'}) { + # If the job is dead: print the remaining partial line + # read remaining + if($$partial and ($opt::tag or defined $opt::tagstring)) { + my $tag = $self->tag(); + $$partial =~ s/^/$tag/gm; + } + print $out_fd $$partial; + # Release the memory + $$partial = undef; + if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + } + } +} + +sub print_joblog { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = "@command"; + } + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag { + my $self = shift; + if(not defined $self->{'tag'}) { + $self->{'tag'} = $self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)."\t"; + } + return $self->{'tag'}; +} + +sub hostgroups { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } +} + +sub exitsignal { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; +} + +{ + my ($disk_full_fh, $b8193, $name); + sub exit_if_disk_full { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); + unlink $name; + $b8193 = "x"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh == 0) { + ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); + ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + + +package CommandLine; + +sub new { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $return_files = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'return_files' => $return_files, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +{ + my $max_slot_number; + + sub slot { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $Global::max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; + } +} + +sub populate { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::minimal_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $CommandLine::already_spread + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); + + if($opt::cat or $opt::fifo) { + # Generate a tempfile name that will be used as {} + my($outfh,$name) = ::tmpfile(SUFFIX => ".pip"); + close $outfh; + # Unlink is needed if: ssh otheruser@localhost + unlink $name; + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]); + } + + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (", + $self->len(), " >= ", + $max_len, + ") at number ", + $self->{'arg_queue'}->arg_number(), + ": ". + (substr($args,0,50))."...\n"); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $CommandLine::already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $CommandLine::already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + } +} + +sub push { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list'}}, $record; + + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + my $rep; + for my $arg (@$record) { + if(defined $arg) { + for my $perlexpr (keys %{$self->{'replacecount'}}) { + # 50% faster than below + $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); + # $rep = $arg->replace($perlexpr,$quote_arg,$self); + # $self->{'len'}{$perlexpr} += length $rep; + # ::debug("length", "Length: ", length $rep, + # "(", $perlexpr, "=>", $rep, ")\n"); + } + } + } +} + +sub pop { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + for my $arg (@$record) { + if(defined $arg) { + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + return $record; +} + +sub pop_all { + # Remove all arguments and zeros the length of replacement strings + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $replacement_string (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$replacement_string} = 0; + } + $self->{'arg_list'} = []; + return @popped; +} + +sub number_of_args { + # The number of records + # Returns: + # number of records + my $self = shift; + # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd + # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az + # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq + # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. + # + # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG + # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq + # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq + # eagdoq oapq. + # + # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz + # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq + # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq + # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq + # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq + # eagdoq oapq U daf13'qp ftq eagdoq oapq + # tffb://qz.iuwubqpum.ads/iuwu/DAF13 + # + # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk + # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq + # tmp fa nq daf13'qp. + # + # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita + # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq + # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. + # + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub args_as_dirname { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len { + # Uses: + # $opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between regargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + } + } + if($opt::nice) { + # Pessimistic length if --nice is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: every char needs to be quoted with \ twice + $len *= 4; + } + # If we are using --env, add the prefix for that, too. + $len += $Global::envvarlen; + + return $len; +} + +sub replaced { + # Uses: + # $Global::noquote + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + $self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +sub replace_placeholders { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my $context_replace = $self->{'context_replace'}; + my @target = @$targetref; + ::debug("replace", "Replace @target\n"); + # -X = context replace + # maybe multiple input sources + # maybe --xapply + if(not @target) { + # @target is empty: Return empty array + return @target; + } + # Fish out the words that have replacement strings in them + my %word; + for (@target) { + my $tt = $_; + ::debug("replace", "Target: $tt"); + # a{1}b{}c{}d + # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d + # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d + # A B C => aAbA B CcA B Cd + # -X A B C => aAbAcAd aAbBcBd aAbCcCd + + if($context_replace) { + while($tt =~ s/([^\s\257]* # before {= + (?: + \257< # {= + [^\257]*? # The perl expression + \257> # =} + [^\s\257]* # after =} + )+)/ /x) { + # $1 = pre \257 perlexpr \257 post + $word{"$1"} ||= 1; + } + } else { + while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { + # $f = \257 perlexpr \257 + $word{$1} ||= 1; + } + } + } + my @word = keys %word; + + my %replace; + my @arg; + for my $record (@{$self->{'arg_list'}}) { + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # Merge arg-objects from records into @arg for easy access + CORE::push @arg, @$record; + } + # Add one arg if empty to allow {#} and {%} to be computed only once + if(not @arg) { @arg = (Arg->new("")); } + # Number of arguments - used for positional arguments + my $n = $#_+1; + + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + for my $word (@word) { + # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF + my $w = $word; + ::debug("replace", "Replacing in $w\n"); + + # Replace positional arguments + $w =~ s< ([^\s\257]*) # before {= + \257< # {= + (-?\d+) # Position (eg. -2 or 3) + ([^\257]*?) # The perl expression + \257> # =} + ([^\s\257]*) # after =} + > + { $1. # Context (pre) + ( + $arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace + $arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self) + : "") + .$4 }egx;# Context (post) + ::debug("replace", "Positional replaced $word with: $w\n"); + + if($w !~ /\257/) { + # No more replacement strings in $w: No need to do more + if($quote) { + CORE::push(@{$replace{::shell_quote($word)}}, $w); + } else { + CORE::push(@{$replace{$word}}, $w); + } + next; + } + # for each arg: + # compute replacement for each string + # replace replacement strings with replacement in the word value + # push to replace word value + ::debug("replace", "Positional done: $w\n"); + for my $arg (@arg) { + my $val = $w; + my $number_of_replacements = 0; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + # Replace {= perl expr =} with value for each arg + $number_of_replacements += + $val =~ s{\257<\Q$perlexpr\E\257>} + {$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg; + } + my $ww = $word; + if($quote) { + $ww = ::shell_quote_scalar($word); + $val = ::shell_quote_scalar($val); + } + if($number_of_replacements) { + CORE::push(@{$replace{$ww}}, $val); + } + } + } + + if($quote) { + @target = ::shell_quote(@target); + } + # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); + if(%replace) { + # Substitute the replace strings with the replacement values + # Must be sorted by length if a short word is a substring of a long word + my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } + sort { length $b <=> length $a } keys %replace); + for(@target) { + s/($regexp)/join(" ",@{$replace{$1}})/ge; + } + } + ::debug("replace", "Return @target\n"); + return wantarray ? @target : "@target"; +} + + +package CommandLineQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $return_files = shift; + my @unget = (); + my ($count,%replacecount,$posrpl,$perlexpr,%len); + my @command = @$commandref; + # If the first command start with '-' it is probably an option + if($command[0] =~ /^\s*(-\S+)/) { + # Is this really a command in $PATH starting with '-'? + my $cmd = $1; + if(not ::which($cmd)) { + ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); + ::wait_and_exit(255); + } + } + # Replace replacement strings with {= perl expr =} + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + for(@command) { + if(/\257/) { + ::error("Command cannot contain the character \257. Use a function for that.\n"); + ::wait_and_exit(255); + } + s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx; + } + for my $rpl (keys %Global::rpl) { + # Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring + # Avoid replacing inside existing {= perl expr =} + for(@command,@Global::ret_files) { + while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) { + } + } + if(defined $opt::tagstring) { + for($opt::tagstring) { + while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {} + } + } + # Do the same for the positional replacement strings + # A bit harder as we have to put in the position number + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + for(@command,@Global::ret_files) { + s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g; + } + if(defined $opt::tagstring) { + $opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g; + } + } + } + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + $replacecount{$1} ++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //x) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + if($opt::tagstring) { + my $t = $opt::tagstring; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1}||=1; + } + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + # Default command = {} + # If not replacement string: append {} + if(not @command) { + @command = ("\257<\257>"); + $Global::noquote = 1; + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + + return bless { + 'unget' => \@unget, + 'command' => \@command, + 'replacecount' => \%replacecount, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => \%len, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'return_files' => $return_files, + 'seq' => 1, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + my $cmd_line; + $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'return_files'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("init","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if($opt::pipe or $opt::pipepart) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); + ::wait_and_exit(255); + } + } else { + if($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } elsif($cmd_line->replaced() eq "") { + # Empty command - get the next instead + return $self->get(); + } + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + +sub size { + my $self = shift; + if(not $self->{'size'}) { + my @all_lines = (); + while(not $self->{'arg_queue'}->empty()) { + push @all_lines, CommandLine->new($self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}); + } + $self->{'size'} = @all_lines; + $self->unget(@all_lines); + } + return $self->{'size'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); + my $cached_limit; + if(-e $len_cache) { + open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); + $cached_limit = <$fh>; + close $fh; + } else { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + mkdir($ENV{'HOME'} . "/.parallel"); + mkdir($ENV{'HOME'} . "/.parallel/tmp"); + open(my $fh, ">", $len_cache); + print $fh $cached_limit; + close $fh; + } + $Limits::Command::line_max_len = $cached_limit; + if($opt::max_chars) { + if($opt::max_chars <= $cached_limit) { + $Limits::Command::line_max_len = $opt::max_chars; + } else { + ::warning("Value for -s option ", + "should be < $cached_limit.\n"); + } + } + } + return $Limits::Command::line_max_len; +} + +sub real_max_length { + # Find the max_length of a command line + # Returns: + # The maximal command line length + # Use an upper bound of 8 MB if the shell allows for for infinite long lengths + my $upper = 8_000_000; + my $len = 8; + do { + if($len > $upper) { return $len }; + $len *= 16; + } while (is_acceptable_command_line_length($len)); + # Then search for the actual max length between 0 and upper bound + return binary_find_max_length(int($len/16),$len); +} + +sub binary_find_max_length { + # Given a lower and upper bound find the max_length of a command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper) = (@_); + if($lower == $upper or $lower == $upper-1) { return $lower; } + my $middle = int (($upper-$lower)/2 + $lower); + ::debug("init", "Maxlen: $lower,$upper,$middle : "); + if (is_acceptable_command_line_length($middle)) { + return binary_find_max_length($middle,$upper); + } else { + return binary_find_max_length($lower,$middle); + } +} + +sub is_acceptable_command_line_length { + # Test if a command line of this length can run + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + + local *STDERR; + open (STDERR, ">", "/dev/null"); + system "true "."x"x$len; + close STDERR; + ::debug("init", "$len=$? "); + return not $?; +} + + +package RecordQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($colsep) { + # Open one file with colsep + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + return shift @{$self->{'unget'}}; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + return [Arg->new("")]; + } else { + return $ret; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordQueue-unget '@_'\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = not @{$self->{'unget'}}; + $empty &&= $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub arg_number { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my $unget_ref=$self->{'unget'}; + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh) { + ::warning("Input is read from the terminal. ". + "Only experts do this on purpose. ". + "Press CTRL-D to exit.\n"); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + if($opt::xapply) { + return $self->xapply_get(); + } else { + return $self->nest_get(); + } +} + +sub unget { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @Global::unget_argv + and not @{$self->{'unget'}}); + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub xapply_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend = undef; + my $empty = 1; + for my $fh (@{$self->{'fhs'}}) { + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}{$fh}}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend = undef; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; + } + $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + return shift @{$self->{'unget'}}; + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +sub read_arg_from_fh { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend = undef; + my $arg; + do {{ + # This makes 10% faster + if(not ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } +# ::debug("run", "read $arg\n"); + # Remove delimiter + $arg =~ s:$/$::; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + $prepend = undef; #undef; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } +} + +sub expand_combinations { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + # If there are more columns: Compute those recursively + my @rest = expand_combinations(@_); + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + ::warning("No such hostgroup (@hostgroups)\n"); + @hostgroups = (keys %Global::hostgroups); + } + } else { + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub replace { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = (shift) ? 1 : 0; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = shift; + $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace + if(not defined $self->{"rpl",0,$perlexpr}) { + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $Global::perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($Global::perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@\n"); + ::wait_and_exit(255); + } + } + # Execute the function + $Global::perleval{$perlexpr}->($job); + $self->{"rpl",0,$perlexpr} = $_; + } + if(not defined $self->{"rpl",$quote,$perlexpr}) { + $self->{"rpl",1,$perlexpr} = + ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); + } + return $self->{"rpl",$quote,$perlexpr}; +} + +sub orig { + my $self = shift; + return $self->{'orig'}; +} + +sub trim_of { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr.\n"); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_delta_time { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in ~/.parallel/semaphores/id-<name> Each +# file in ~/.parallel/semaphores/id-<name>/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new { + my $class = shift; + my $id = shift; + my $count = shift; + $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id="id-".$id; # To distinguish it from a process id + my $parallel_dir = $ENV{'HOME'}."/.parallel"; + -d $parallel_dir or mkdir_or_die($parallel_dir); + my $parallel_locks = $parallel_dir."/semaphores"; + -d $parallel_locks or mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub acquire { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + $self->atomic_link_if_count_less_than() and last; + ::debug("sem", "Remove dead locks"); + my $lockdir = $self->{'lockdir'}; + for my $d (glob "$lockdir/*") { + ::debug("sem", "Lock $d $lockdir\n"); + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(not kill 0, $1) { + ::debug("sem", "Dead: $d"); + unlink $d; + } else { + ::debug("sem", "Alive: $d"); + } + } + } + # try again + $self->atomic_link_if_count_less_than() and last; + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if(defined($opt::timeout) and + $start_time + $opt::timeout > time) { + # Acquire the lock anyway + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release { + my $self = shift; + unlink $self->{'pidfile'}; + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + unlink $self->{'idfile'}; + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub _release { + my $self = shift; + + unlink $self->{'pidfile'}; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem", $nlinks, "<", $self->{'count'}); + if($nlinks-- > 1) { + unlink $self->{'idfile'}; + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + print $fh "#"x$nlinks; + close $fh; + } else { + unlink $self->{'idfile'}; + rmdir $self->{'lockdir'}; + } + $self->unlock(); + ::debug("sem", "released $self->{'pid'}\n"); +} + +sub atomic_link_if_count_less_than { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + ::debug($self->nlinks(), "<", $self->{'count'}); + if($self->nlinks() < $self->{'count'}) { + -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + } + $self->unlock(); + ::debug("run", "atomic $retval"); + return $retval; +} + +sub _atomic_link_if_count_less_than { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem", $nlinks, "<", $self->{'count'}); + if($nlinks++ < $self->{'count'}) { + -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + print $fh "#"x$nlinks; + close $fh; + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks { + my $self = shift; + if(-e $self->{'idfile'}) { + ::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n"); + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!"); + ::warning("Will wait for a random while\n"); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: bail out + ::warning("Semaphore timed out. Ignoring timeout."); + $locked = 3; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock { + my $self = shift; + unlink $self->{'lockfile'}; + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +sub mkdir_or_die { + # If dir is not writable: die + my $dir = shift; + my @dir_parts = split(m:/:,$dir); + my ($ddir,$part); + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $ddir .= "/".$part; + -d $ddir and next; + mkdir $ddir; + } + if(not -w $dir) { + ::error("Cannot write to $dir: $!\n"); + ::wait_and_exit(255); + } +} + +# Keep perl -w happy +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = 0; diff --git a/src/rocksdb/build_tools/make_package.sh b/src/rocksdb/build_tools/make_package.sh new file mode 100755 index 00000000..0d86548e --- /dev/null +++ b/src/rocksdb/build_tools/make_package.sh @@ -0,0 +1,133 @@ +# shellcheck disable=SC1113 +#/usr/bin/env bash + +set -e + +function log() { + echo "[+] $1" +} + +function fatal() { + echo "[!] $1" + exit 1 +} + +function platform() { + local __resultvar=$1 + if [[ -f "/etc/yum.conf" ]]; then + eval $__resultvar="centos" + elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then + eval $__resultvar="ubuntu" + else + fatal "Unknwon operating system" + fi +} +platform OS + +function package() { + if [[ $OS = "ubuntu" ]]; then + if dpkg --get-selections | grep --quiet $1; then + log "$1 is already installed. skipping." + else + # shellcheck disable=SC2068 + apt-get install $@ -y + fi + elif [[ $OS = "centos" ]]; then + if rpm -qa | grep --quiet $1; then + log "$1 is already installed. skipping." + else + # shellcheck disable=SC2068 + yum install $@ -y + fi + fi +} + +function detect_fpm_output() { + if [[ $OS = "ubuntu" ]]; then + export FPM_OUTPUT=deb + elif [[ $OS = "centos" ]]; then + export FPM_OUTPUT=rpm + fi +} +detect_fpm_output + +function gem_install() { + if gem list | grep --quiet $1; then + log "$1 is already installed. skipping." + else + # shellcheck disable=SC2068 + gem install $@ + fi +} + +function main() { + if [[ $# -ne 1 ]]; then + fatal "Usage: $0 <rocksdb_version>" + else + log "using rocksdb version: $1" + fi + + if [[ -d /vagrant ]]; then + if [[ $OS = "ubuntu" ]]; then + package g++-4.8 + export CXX=g++-4.8 + + # the deb would depend on libgflags2, but the static lib is the only thing + # installed by make install + package libgflags-dev + + package ruby-all-dev + elif [[ $OS = "centos" ]]; then + pushd /etc/yum.repos.d + if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then + wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo + fi + package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6 + package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6 + export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc + export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp + export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++ + export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin + popd + if ! rpm -qa | grep --quiet gflags; then + rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm + fi + + package ruby + package ruby-devel + package rubygems + package rpm-build + fi + fi + gem_install fpm + + make static_lib + make install INSTALL_PATH=package + + cd package + + LIB_DIR=lib + if [[ -z "$ARCH" ]]; then + ARCH=$(getconf LONG_BIT) + fi + if [[ ("$FPM_OUTPUT" = "rpm") && ($ARCH -eq 64) ]]; then + mv lib lib64 + LIB_DIR=lib64 + fi + + fpm \ + -s dir \ + -t $FPM_OUTPUT \ + -n rocksdb \ + -v $1 \ + --prefix /usr \ + --url http://rocksdb.org/ \ + -m rocksdb@fb.com \ + --license BSD \ + --vendor Facebook \ + --description "RocksDB is an embeddable persistent key-value store for fast storage." \ + include $LIB_DIR +} + +# shellcheck disable=SC2068 +main $@ diff --git a/src/rocksdb/build_tools/precommit_checker.py b/src/rocksdb/build_tools/precommit_checker.py new file mode 100755 index 00000000..0f8884df --- /dev/null +++ b/src/rocksdb/build_tools/precommit_checker.py @@ -0,0 +1,208 @@ +#!/usr/local/fbcode/gcc-4.9-glibc-2.20-fb/bin/python2.7 + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import argparse +import commands +import subprocess +import sys +import re +import os +import time + + +# +# Simple logger +# + +class Log: + + def __init__(self, filename): + self.filename = filename + self.f = open(self.filename, 'w+', 0) + + def caption(self, str): + line = "\n##### %s #####\n" % str + if self.f: + self.f.write("%s \n" % line) + else: + print(line) + + def error(self, str): + data = "\n\n##### ERROR ##### %s" % str + if self.f: + self.f.write("%s \n" % data) + else: + print(data) + + def log(self, str): + if self.f: + self.f.write("%s \n" % str) + else: + print(str) + +# +# Shell Environment +# + + +class Env(object): + + def __init__(self, logfile, tests): + self.tests = tests + self.log = Log(logfile) + + def shell(self, cmd, path=os.getcwd()): + if path: + os.chdir(path) + + self.log.log("==== shell session ===========================") + self.log.log("%s> %s" % (path, cmd)) + status = subprocess.call("cd %s; %s" % (path, cmd), shell=True, + stdout=self.log.f, stderr=self.log.f) + self.log.log("status = %s" % status) + self.log.log("============================================== \n\n") + return status + + def GetOutput(self, cmd, path=os.getcwd()): + if path: + os.chdir(path) + + self.log.log("==== shell session ===========================") + self.log.log("%s> %s" % (path, cmd)) + status, out = commands.getstatusoutput(cmd) + self.log.log("status = %s" % status) + self.log.log("out = %s" % out) + self.log.log("============================================== \n\n") + return status, out + +# +# Pre-commit checker +# + + +class PreCommitChecker(Env): + + def __init__(self, args): + Env.__init__(self, args.logfile, args.tests) + self.ignore_failure = args.ignore_failure + + # + # Get commands for a given job from the determinator file + # + def get_commands(self, test): + status, out = self.GetOutput( + "RATIO=1 build_tools/rocksdb-lego-determinator %s" % test, ".") + return status, out + + # + # Run a specific CI job + # + def run_test(self, test): + self.log.caption("Running test %s locally" % test) + + # get commands for the CI job determinator + status, cmds = self.get_commands(test) + if status != 0: + self.log.error("Error getting commands for test %s" % test) + return False + + # Parse the JSON to extract the commands to run + cmds = re.findall("'shell':'([^\']*)'", cmds) + + if len(cmds) == 0: + self.log.log("No commands found") + return False + + # Run commands + for cmd in cmds: + # Replace J=<..> with the local environment variable + if "J" in os.environ: + cmd = cmd.replace("J=1", "J=%s" % os.environ["J"]) + cmd = cmd.replace("make ", "make -j%s " % os.environ["J"]) + # Run the command + status = self.shell(cmd, ".") + if status != 0: + self.log.error("Error running command %s for test %s" + % (cmd, test)) + return False + + return True + + # + # Run specified CI jobs + # + def run_tests(self): + if not self.tests: + self.log.error("Invalid args. Please provide tests") + return False + + self.print_separator() + self.print_row("TEST", "RESULT") + self.print_separator() + + result = True + for test in self.tests: + start_time = time.time() + self.print_test(test) + result = self.run_test(test) + elapsed_min = (time.time() - start_time) / 60 + if not result: + self.log.error("Error running test %s" % test) + self.print_result("FAIL (%dm)" % elapsed_min) + if not self.ignore_failure: + return False + result = False + else: + self.print_result("PASS (%dm)" % elapsed_min) + + self.print_separator() + return result + + # + # Print a line + # + def print_separator(self): + print("".ljust(60, "-")) + + # + # Print two colums + # + def print_row(self, c0, c1): + print("%s%s" % (c0.ljust(40), c1.ljust(20))) + + def print_test(self, test): + print(test.ljust(40), end="") + sys.stdout.flush() + + def print_result(self, result): + print(result.ljust(20)) + +# +# Main +# +parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.') + +# --log <logfile> +parser.add_argument('--logfile', default='/tmp/precommit-check.log', + help='Log file. Default is /tmp/precommit-check.log') +# --ignore_failure +parser.add_argument('--ignore_failure', action='store_true', default=False, + help='Stop when an error occurs') +# <test ....> +parser.add_argument('tests', nargs='+', + help='CI test(s) to run. e.g: unit punit asan tsan ubsan') + +args = parser.parse_args() +checker = PreCommitChecker(args) + +print("Please follow log %s" % checker.log.filename) + +if not checker.run_tests(): + print("Error running tests. Please check log file %s" + % checker.log.filename) + sys.exit(1) + +sys.exit(0) diff --git a/src/rocksdb/build_tools/regression_build_test.sh b/src/rocksdb/build_tools/regression_build_test.sh new file mode 100755 index 00000000..69806332 --- /dev/null +++ b/src/rocksdb/build_tools/regression_build_test.sh @@ -0,0 +1,413 @@ +#!/usr/bin/env bash + +set -e + +NUM=10000000 + +if [ $# -eq 1 ];then + DATA_DIR=$1 +elif [ $# -eq 2 ];then + DATA_DIR=$1 + STAT_FILE=$2 +fi + +# On the production build servers, set data and stat +# files/directories not in /tmp or else the tempdir cleaning +# scripts will make you very unhappy. +DATA_DIR=${DATA_DIR:-$(mktemp -t -d rocksdb_XXXX)} +STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)} + +function cleanup { + rm -rf $DATA_DIR + rm -f $STAT_FILE.fillseq + rm -f $STAT_FILE.readrandom + rm -f $STAT_FILE.overwrite + rm -f $STAT_FILE.memtablefillreadrandom +} + +trap cleanup EXIT + +if [ -z $GIT_BRANCH ]; then + git_br=`git rev-parse --abbrev-ref HEAD` +else + git_br=$(basename $GIT_BRANCH) +fi + +if [ $git_br == "master" ]; then + git_br="" +else + git_br="."$git_br +fi + +make release + +# measure fillseq + fill up the DB for overwrite benchmark +./db_bench \ + --benchmarks=fillseq \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --bloom_bits=10 \ + --num=$NUM \ + --writes=$NUM \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 > ${STAT_FILE}.fillseq + +# measure overwrite performance +./db_bench \ + --benchmarks=overwrite \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --writes=$((NUM / 10)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=8 > ${STAT_FILE}.overwrite + +# fill up the db for readrandom benchmark (1GB total size) +./db_bench \ + --benchmarks=fillseq \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --bloom_bits=10 \ + --num=$NUM \ + --writes=$NUM \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=1 > /dev/null + +# measure readrandom with 6GB block cache +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --reads=$((NUM / 5)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandom + +# measure readrandom with 6GB block cache and tailing iterator +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --reads=$((NUM / 5)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --use_tailing_iterator=1 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandomtailing + +# measure readrandom with 100MB block cache +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --reads=$((NUM / 5)) \ + --cache_size=104857600 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandomsmallblockcache + +# measure readrandom with 8k data in memtable +./db_bench \ + --benchmarks=overwrite,readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --reads=$((NUM / 5)) \ + --writes=512 \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --write_buffer_size=1000000000 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandom_mem_sst + + +# fill up the db for readrandom benchmark with filluniquerandom (1GB total size) +./db_bench \ + --benchmarks=filluniquerandom \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --bloom_bits=10 \ + --num=$((NUM / 4)) \ + --writes=$((NUM / 4)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=1 > /dev/null + +# dummy test just to compact the data +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$((NUM / 1000)) \ + --reads=$((NUM / 1000)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > /dev/null + +# measure readrandom after load with filluniquerandom with 6GB block cache +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$((NUM / 4)) \ + --reads=$((NUM / 4)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --disable_auto_compactions=1 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandom_filluniquerandom + +# measure readwhilewriting after load with filluniquerandom with 6GB block cache +./db_bench \ + --benchmarks=readwhilewriting \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$((NUM / 4)) \ + --reads=$((NUM / 4)) \ + --benchmark_write_rate_limit=$(( 110 * 1024 )) \ + --write_buffer_size=100000000 \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readwhilewriting + +# measure memtable performance -- none of the data gets flushed to disk +./db_bench \ + --benchmarks=fillrandom,readrandom, \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --num=$((NUM / 10)) \ + --reads=$NUM \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --write_buffer_size=1000000000 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --value_size=10 \ + --threads=16 > ${STAT_FILE}.memtablefillreadrandom + +common_in_mem_args="--db=/dev/shm/rocksdb \ + --num_levels=6 \ + --key_size=20 \ + --prefix_size=12 \ + --keys_per_prefix=10 \ + --value_size=100 \ + --compression_type=none \ + --compression_ratio=1 \ + --hard_rate_limit=2 \ + --write_buffer_size=134217728 \ + --max_write_buffer_number=4 \ + --level0_file_num_compaction_trigger=8 \ + --level0_slowdown_writes_trigger=16 \ + --level0_stop_writes_trigger=24 \ + --target_file_size_base=134217728 \ + --max_bytes_for_level_base=1073741824 \ + --disable_wal=0 \ + --wal_dir=/dev/shm/rocksdb \ + --sync=0 \ + --verify_checksum=1 \ + --delete_obsolete_files_period_micros=314572800 \ + --max_grandparent_overlap_factor=10 \ + --use_plain_table=1 \ + --open_files=-1 \ + --mmap_read=1 \ + --mmap_write=0 \ + --memtablerep=prefix_hash \ + --bloom_bits=10 \ + --bloom_locality=1 \ + --perf_level=0" + +# prepare a in-memory DB with 50M keys, total DB size is ~6G +./db_bench \ + $common_in_mem_args \ + --statistics=0 \ + --max_background_compactions=16 \ + --max_background_flushes=16 \ + --benchmarks=filluniquerandom \ + --use_existing_db=0 \ + --num=52428800 \ + --threads=1 > /dev/null + +# Readwhilewriting +./db_bench \ + $common_in_mem_args \ + --statistics=1 \ + --max_background_compactions=4 \ + --max_background_flushes=0 \ + --benchmarks=readwhilewriting\ + --use_existing_db=1 \ + --duration=600 \ + --threads=32 \ + --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram + +# Seekrandomwhilewriting +./db_bench \ + $common_in_mem_args \ + --statistics=1 \ + --max_background_compactions=4 \ + --max_background_flushes=0 \ + --benchmarks=seekrandomwhilewriting \ + --use_existing_db=1 \ + --use_tailing_iterator=1 \ + --duration=600 \ + --threads=32 \ + --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram + +# measure fillseq with bunch of column families +./db_bench \ + --benchmarks=fillseq \ + --num_column_families=500 \ + --write_buffer_size=1048576 \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --num=$NUM \ + --writes=$NUM \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 > ${STAT_FILE}.fillseq_lots_column_families + +# measure overwrite performance with bunch of column families +./db_bench \ + --benchmarks=overwrite \ + --num_column_families=500 \ + --write_buffer_size=1048576 \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --num=$NUM \ + --writes=$((NUM / 10)) \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=8 > ${STAT_FILE}.overwrite_lots_column_families + +# send data to ods +function send_to_ods { + key="$1" + value="$2" + + if [ -z $JENKINS_HOME ]; then + # running on devbox, just print out the values + echo $1 $2 + return + fi + + if [ -z "$value" ];then + echo >&2 "ERROR: Key $key doesn't have a value." + return + fi + curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \ + --connect-timeout 60 +} + +function send_benchmark_to_ods { + bench="$1" + bench_key="$2" + file="$3" + + QPS=$(grep $bench $file | awk '{print $5}') + P50_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $3}' ) + P75_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $5}' ) + P99_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $7}' ) + + send_to_ods rocksdb.build.$bench_key.qps $QPS + send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS + send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS + send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS +} + +send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite +send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq +send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom +send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing +send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache +send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst +send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom +send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom +send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom +send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting +send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram +send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram +send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families +send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families diff --git a/src/rocksdb/build_tools/rocksdb-lego-determinator b/src/rocksdb/build_tools/rocksdb-lego-determinator new file mode 100755 index 00000000..2447a19a --- /dev/null +++ b/src/rocksdb/build_tools/rocksdb-lego-determinator @@ -0,0 +1,911 @@ +#!/usr/bin/env bash +# This script is executed by Sandcastle +# to determine next steps to run + +# Usage: +# EMAIL=<email> ONCALL=<email> TRIGGER=<trigger> SUBSCRIBER=<email> rocks_ci.py <test-name> +# +# Input Value +# ------------------------------------------------------------------------- +# EMAIL Email address to report on trigger conditions +# ONCALL Email address to raise a task on failure +# TRIGGER Trigger conditions for email. Valid values are fail, warn, all +# SUBSCRIBER Email addresss to add as subscriber for task +# + +# +# Report configuration +# +REPORT_EMAIL= +if [ ! -z $EMAIL ]; then + if [ -z $TRIGGER ]; then + TRIGGER="fail" + fi + + REPORT_EMAIL=" + { + 'type':'email', + 'triggers': [ '$TRIGGER' ], + 'emails':['$EMAIL'] + }," +fi + +CREATE_TASK= +if [ ! -z $ONCALL ]; then + CREATE_TASK=" + { + 'type':'task', + 'triggers':[ 'fail' ], + 'priority':0, + 'subscribers':[ '$SUBSCRIBER' ], + 'tags':[ 'rocksdb', 'ci' ], + }," +fi + +# For now, create the tasks using only the dedicated task creation tool. +CREATE_TASK= + +REPORT= +if [[ ! -z $REPORT_EMAIL || ! -z $CREATE_TASK ]]; then + REPORT="'report': [ + $REPORT_EMAIL + $CREATE_TASK + ]" +fi + +# +# Helper variables +# +CLEANUP_ENV=" +{ + 'name':'Cleanup environment', + 'shell':'rm -rf /dev/shm/rocksdb && mkdir /dev/shm/rocksdb && (chmod +t /dev/shm || true) && make clean', + 'user':'root' +}" + +# We will eventually set the RATIO to 1, but we want do this +# in steps. RATIO=$(nproc) will make it work as J=1 +if [ -z $RATIO ]; then + RATIO=$(nproc) +fi + +if [ -z $PARALLEL_J ]; then + PARALLEL_J="J=$(expr $(nproc) / ${RATIO})" +fi + +if [ -z $PARALLEL_j ]; then + PARALLEL_j="-j$(expr $(nproc) / ${RATIO})" +fi + +PARALLELISM="$PARALLEL_J $PARALLEL_j" + +DEBUG="OPT=-g" +SHM="TEST_TMPDIR=/dev/shm/rocksdb" +NON_SHM="TMPD=/tmp/rocksdb_test_tmp" +GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1" +ASAN="COMPILE_WITH_ASAN=1" +CLANG="USE_CLANG=1" +# in gcc-5 there are known problems with TSAN like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71090. +# using platform007 gives us gcc-8 or higher which has that bug fixed. +TSAN="ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007=1 COMPILE_WITH_TSAN=1" +UBSAN="COMPILE_WITH_UBSAN=1" +TSAN_CRASH='CRASH_TEST_EXT_ARGS="--compression_type=zstd --log2_keys_per_lock=22"' +NON_TSAN_CRASH="CRASH_TEST_EXT_ARGS=--compression_type=zstd" +DISABLE_JEMALLOC="DISABLE_JEMALLOC=1" +HTTP_PROXY="https_proxy=http://fwdproxy.29.prn1:8080 http_proxy=http://fwdproxy.29.prn1:8080 ftp_proxy=http://fwdproxy.29.prn1:8080" +SETUP_JAVA_ENV="export $HTTP_PROXY; export JAVA_HOME=/usr/local/jdk-8u60-64/; export PATH=\$JAVA_HOME/bin:\$PATH" +PARSER="'parser':'python build_tools/error_filter.py $1'" + +CONTRUN_NAME="ROCKSDB_CONTRUN_NAME" + +# This code is getting called under various scenarios. What we care about is to +# understand when it's called from nightly contruns because in that case we'll +# create tasks for any failures. To follow the existing pattern, we'll check +# the value of $ONCALL. If it's a diff then just call `false` to make sure +# that errors will be properly propagated to the caller. +if [ ! -z $ONCALL ]; then + TASK_CREATION_TOOL="/usr/local/bin/mysql_mtr_filter --rocksdb --oncall $ONCALL" +else + TASK_CREATION_TOOL="false" +fi + +ARTIFACTS=" 'artifacts': [ + { + 'name':'database', + 'paths':[ '/dev/shm/rocksdb' ], + } +]" + +# +# A mechanism to disable tests temporarily +# +DISABLE_COMMANDS="[ + { + 'name':'Disable test', + 'oncall':'$ONCALL', + 'steps': [ + { + 'name':'Job disabled. Please contact test owner', + 'shell':'exit 1', + 'user':'root' + }, + ], + } +]" + +# +# RocksDB unit test +# +UNIT_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and test RocksDB debug version', + 'shell':'$SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB unit test not under /dev/shm +# +UNIT_TEST_NON_SHM_COMMANDS="[ + { + 'name':'Rocksdb Unit Test', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and test RocksDB debug version', + 'timeout': 86400, + 'shell':'$NON_SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=non_shm_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB release build and unit tests +# +RELEASE_BUILD_COMMANDS="[ + { + 'name':'Rocksdb Release Build', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build RocksDB release', + 'shell':'make $PARALLEL_j release || $CONTRUN_NAME=release $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB unit test on gcc-4.8.1 +# +UNIT_TEST_COMMANDS_481="[ + { + 'name':'Rocksdb Unit Test on GCC 4.8.1', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and test RocksDB debug version', + 'shell':'$SHM $GCC_481 $DEBUG make $PARALLELISM check || $CONTRUN_NAME=unit_gcc_481_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB release build and unit tests +# +RELEASE_BUILD_COMMANDS_481="[ + { + 'name':'Rocksdb Release on GCC 4.8.1', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build RocksDB release on GCC 4.8.1', + 'shell':'$GCC_481 make $PARALLEL_j release || $CONTRUN_NAME=release_gcc481 $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB unit test with CLANG +# +CLANG_UNIT_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and test RocksDB debug', + 'shell':'$CLANG $SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=clang_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB release build with CLANG +# +CLANG_RELEASE_BUILD_COMMANDS="[ + { + 'name':'Rocksdb CLANG Release Build', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build RocksDB release', + 'shell':'$CLANG make $PARALLEL_j release|| $CONTRUN_NAME=clang_release $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB analyze +# +CLANG_ANALYZE_COMMANDS="[ + { + 'name':'Rocksdb analyze', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'RocksDB build and analyze', + 'shell':'$CLANG $SHM $DEBUG make $PARALLEL_j analyze || $CONTRUN_NAME=clang_analyze $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB code coverage +# +CODE_COV_COMMANDS="[ + { + 'name':'Rocksdb Unit Test Code Coverage', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build, test and collect code coverage info', + 'shell':'$SHM $DEBUG make $PARALLELISM coverage || $CONTRUN_NAME=coverage $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB unity +# +UNITY_COMMANDS="[ + { + 'name':'Rocksdb Unity', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build, test unity test', + 'shell':'$SHM $DEBUG V=1 make J=1 unity_test || $CONTRUN_NAME=unity_test $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# Build RocksDB lite +# +LITE_BUILD_COMMANDS="[ + { + 'name':'Rocksdb Lite build', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build RocksDB debug version', + 'shell':'make J=1 LITE=1 all check || $CONTRUN_NAME=lite $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# Report RocksDB lite binary size to scuba +REPORT_LITE_BINARY_SIZE_COMMANDS="[ + { + 'name':'Rocksdb Lite Binary Size', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Report RocksDB Lite binary size to scuba', + 'shell':'tools/report_lite_binary_size.sh', + 'user':'root', + }, + ], +]" + +# +# RocksDB stress/crash test +# +STRESS_CRASH_TEST_COMMANDS="[ + { + 'name':'Rocksdb Stress/Crash Test', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug stress tests', + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + { + 'name':'Build and run RocksDB debug crash tests', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test || $CONTRUN_NAME=crash_test $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + } + ], + $ARTIFACTS, + $REPORT + } +]" + +# +# RocksDB stress/crash test with atomic flush +# +STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ + { + 'name':'Rocksdb Stress/Crash Test (atomic flush)', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug stress tests', + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + { + 'name':'Build and run RocksDB debug crash tests with atomic flush', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=crash_test_with_atomic_flush $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + } + ], + $ARTIFACTS, + $REPORT + } +]" + +# RocksDB write stress test. +# We run on disk device on purpose (i.e. no $SHM) +# because we want to add some randomness to fsync commands +WRITE_STRESS_COMMANDS="[ + { + 'name':'Rocksdb Write Stress Test', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB write stress tests', + 'shell':'make write_stress && python tools/write_stress_runner.py --runtime_sec=3600 --db=/tmp/rocksdb_write_stress || $CONTRUN_NAME=write_stress $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + } + ], + 'artifacts': [{'name': 'database', 'paths': ['/tmp/rocksdb_write_stress']}], + $REPORT + } +]" + + +# +# RocksDB test under address sanitizer +# +ASAN_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test under ASAN', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Test RocksDB debug under ASAN', +'shell':'set -o pipefail && ($SHM $ASAN $DEBUG make $PARALLELISM asan_check || $CONTRUN_NAME=asan_check $TASK_CREATION_TOOL) |& /usr/facebook/ops/scripts/asan_symbolize.py -d', + 'user':'root', + $PARSER + } + ], + $REPORT + } +]" + +# +# RocksDB crash testing under address sanitizer +# +ASAN_CRASH_TEST_COMMANDS="[ + { + 'name':'Rocksdb crash test under ASAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug asan_crash_test', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test || $CONTRUN_NAME=asan_crash_test $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB crash testing with atomic flush under address sanitizer +# +ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ + { + 'name':'Rocksdb crash test (atomic flush) under ASAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug asan_crash_test_with_atomic_flush', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test_with_atomic_flush || $CONTRUN_NAME=asan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB test under undefined behavior sanitizer +# +UBSAN_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test under UBSAN', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Test RocksDB debug under UBSAN', + 'shell':'set -o pipefail && $SHM $UBSAN $DEBUG make $PARALLELISM ubsan_check || $CONTRUN_NAME=ubsan_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + } + ], + $REPORT + } +]" + +# +# RocksDB crash testing under udnefined behavior sanitizer +# +UBSAN_CRASH_TEST_COMMANDS="[ + { + 'name':'Rocksdb crash test under UBSAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug ubsan_crash_test', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 ubsan_crash_test || $CONTRUN_NAME=ubsan_crash_test $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB crash testing with atomic flush under undefined behavior sanitizer +# +UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ + { + 'name':'Rocksdb crash test (atomic flush) under UBSAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build and run RocksDB debug ubsan_crash_test_with_atomic_flush', + 'timeout': 86400, + 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 ubsan_crash_test_with_atomic_flush || $CONTRUN_NAME=ubsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB unit test under valgrind +# +VALGRIND_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test under valgrind', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Run RocksDB debug unit tests', + 'timeout': 86400, + 'shell':'$SHM $DEBUG make $PARALLELISM valgrind_test || $CONTRUN_NAME=valgrind_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB test under TSAN +# +TSAN_UNIT_TEST_COMMANDS="[ + { + 'name':'Rocksdb Unit Test under TSAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Run RocksDB debug unit test', + 'timeout': 86400, + 'shell':'set -o pipefail && $SHM $DEBUG $TSAN make $PARALLELISM check || $CONTRUN_NAME=tsan_check $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB crash test under TSAN +# +TSAN_CRASH_TEST_COMMANDS="[ + { + 'name':'Rocksdb Crash Test under TSAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Compile and run', + 'timeout': 86400, + 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test || $CONTRUN_NAME=tsan_crash_test $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB crash test with atomic flush under TSAN +# +TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ + { + 'name':'Rocksdb Crash Test with atomic flush under TSAN', + 'oncall':'$ONCALL', + 'timeout': 86400, + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Compile and run', + 'timeout': 86400, + 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=tsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB format compatible +# + +run_format_compatible() +{ + export TEST_TMPDIR=/dev/shm/rocksdb + rm -rf /dev/shm/rocksdb + mkdir /dev/shm/rocksdb + + tools/check_format_compatible.sh +} + +FORMAT_COMPATIBLE_COMMANDS="[ + { + 'name':'Rocksdb Format Compatible tests', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Run RocksDB debug unit test', + 'shell':'build_tools/rocksdb-lego-determinator run_format_compatible || $CONTRUN_NAME=run_format_compatible $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB no compression +# +run_no_compression() +{ + export TEST_TMPDIR=/dev/shm/rocksdb + rm -rf /dev/shm/rocksdb + mkdir /dev/shm/rocksdb + make clean + cat build_tools/fbcode_config.sh | grep -iv dzstd | grep -iv dzlib | grep -iv dlz4 | grep -iv dsnappy | grep -iv dbzip2 > .tmp.fbcode_config.sh + mv .tmp.fbcode_config.sh build_tools/fbcode_config.sh + cat Makefile | grep -v tools/ldb_test.py > .tmp.Makefile + mv .tmp.Makefile Makefile + make $DEBUG J=1 check +} + +NO_COMPRESSION_COMMANDS="[ + { + 'name':'Rocksdb No Compression tests', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Run RocksDB debug unit test', + 'shell':'build_tools/rocksdb-lego-determinator run_no_compression || $CONTRUN_NAME=run_no_compression $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB regression +# +run_regression() +{ + time -v bash -vx ./build_tools/regression_build_test.sh $(mktemp -d $WORKSPACE/leveldb.XXXX) $(mktemp leveldb_test_stats.XXXX) + + # ======= report size to ODS ======== + + # parameters: $1 -- key, $2 -- value + function send_size_to_ods { + curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=rocksdb.build_size.$1&value=$2" \ + --connect-timeout 60 + } + + # === normal build === + make clean + make -j$(nproc) static_lib + send_size_to_ods static_lib $(stat --printf="%s" librocksdb.a) + strip librocksdb.a + send_size_to_ods static_lib_stripped $(stat --printf="%s" librocksdb.a) + + make -j$(nproc) shared_lib + send_size_to_ods shared_lib $(stat --printf="%s" `readlink -f librocksdb.so`) + strip `readlink -f librocksdb.so` + send_size_to_ods shared_lib_stripped $(stat --printf="%s" `readlink -f librocksdb.so`) + + # === lite build === + make clean + make LITE=1 -j$(nproc) static_lib + send_size_to_ods static_lib_lite $(stat --printf="%s" librocksdb.a) + strip librocksdb.a + send_size_to_ods static_lib_lite_stripped $(stat --printf="%s" librocksdb.a) + + make LITE=1 -j$(nproc) shared_lib + send_size_to_ods shared_lib_lite $(stat --printf="%s" `readlink -f librocksdb.so`) + strip `readlink -f librocksdb.so` + send_size_to_ods shared_lib_lite_stripped $(stat --printf="%s" `readlink -f librocksdb.so`) +} + +REGRESSION_COMMANDS="[ + { + 'name':'Rocksdb regression commands', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Make and run script', + 'shell':'build_tools/rocksdb-lego-determinator run_regression || $CONTRUN_NAME=run_regression $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + +# +# RocksDB Java build +# +JAVA_BUILD_TEST_COMMANDS="[ + { + 'name':'Rocksdb Java Build', + 'oncall':'$ONCALL', + 'steps': [ + $CLEANUP_ENV, + { + 'name':'Build RocksDB for Java', + 'shell':'$SETUP_JAVA_ENV; $SHM make rocksdbjava || $CONTRUN_NAME=rocksdbjava $TASK_CREATION_TOOL', + 'user':'root', + $PARSER + }, + ], + $REPORT + } +]" + + +case $1 in + unit) + echo $UNIT_TEST_COMMANDS + ;; + unit_non_shm) + echo $UNIT_TEST_NON_SHM_COMMANDS + ;; + release) + echo $RELEASE_BUILD_COMMANDS + ;; + unit_481) + echo $UNIT_TEST_COMMANDS_481 + ;; + release_481) + echo $RELEASE_BUILD_COMMANDS_481 + ;; + clang_unit) + echo $CLANG_UNIT_TEST_COMMANDS + ;; + clang_release) + echo $CLANG_RELEASE_BUILD_COMMANDS + ;; + clang_analyze) + echo $CLANG_ANALYZE_COMMANDS + ;; + code_cov) + echo $CODE_COV_COMMANDS + ;; + unity) + echo $UNITY_COMMANDS + ;; + lite) + echo $LITE_BUILD_COMMANDS + ;; + report_lite_binary_size) + echo $REPORT_LITE_BINARY_SIZE_COMMANDS + ;; + stress_crash) + echo $STRESS_CRASH_TEST_COMMANDS + ;; + stress_crash_with_atomic_flush) + echo $STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS + ;; + write_stress) + echo $WRITE_STRESS_COMMANDS + ;; + asan) + echo $ASAN_TEST_COMMANDS + ;; + asan_crash) + echo $ASAN_CRASH_TEST_COMMANDS + ;; + asan_crash_with_atomic_flush) + echo $ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS + ;; + ubsan) + echo $UBSAN_TEST_COMMANDS + ;; + ubsan_crash) + echo $UBSAN_CRASH_TEST_COMMANDS + ;; + ubsan_crash_with_atomic_flush) + echo $UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS + ;; + valgrind) + echo $VALGRIND_TEST_COMMANDS + ;; + tsan) + echo $TSAN_UNIT_TEST_COMMANDS + ;; + tsan_crash) + echo $TSAN_CRASH_TEST_COMMANDS + ;; + tsan_crash_with_atomic_flush) + echo $TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS + ;; + format_compatible) + echo $FORMAT_COMPATIBLE_COMMANDS + ;; + run_format_compatible) + run_format_compatible + ;; + no_compression) + echo $NO_COMPRESSION_COMMANDS + ;; + run_no_compression) + run_no_compression + ;; + regression) + echo $REGRESSION_COMMANDS + ;; + run_regression) + run_regression + ;; + java_build) + echo $JAVA_BUILD_TEST_COMMANDS + ;; + *) + echo "Invalid determinator command" + ;; +esac diff --git a/src/rocksdb/build_tools/run_ci_db_test.ps1 b/src/rocksdb/build_tools/run_ci_db_test.ps1 new file mode 100644 index 00000000..0f8198b4 --- /dev/null +++ b/src/rocksdb/build_tools/run_ci_db_test.ps1 @@ -0,0 +1,486 @@ +# This script enables you running RocksDB tests by running +# All the tests concurrently and utilizing all the cores +Param( + [switch]$EnableJE = $false, # Look for and use test executable, append _je to listed exclusions + [switch]$RunAll = $false, # Will attempt discover all *_test[_je].exe binaries and run all + # of them as Google suites. I.e. It will run test cases concurrently + # except those mentioned as $Run, those will run as individual test cases + # And any execlued with $ExcludeExes or $ExcludeCases + # It will also not run any individual test cases + # excluded but $ExcludeCasese + [switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically + # It will attempt to run them in parallel w/o breaking them up on individual + # test cases. Those listed with $ExcludeExes will be excluded + [string]$SuiteRun = "", # Split test suites in test cases and run in parallel, not compatible with $RunAll + [string]$Run = "", # Run specified executables in parallel but do not split to test cases + [string]$ExcludeCases = "", # Exclude test cases, expects a comma separated list, no spaces + # Takes effect when $RunAll or $SuiteRun is specified. Must have full + # Test cases name including a group and a parameter if any + [string]$ExcludeExes = "", # Exclude exes from consideration, expects a comma separated list, + # no spaces. Takes effect only when $RunAll is specified + [string]$WorkFolder = "", # Direct tests to use that folder. SSD or Ram drive are better options. + # Number of async tasks that would run concurrently. Recommend a number below 64. + # However, CPU utlization really depends on the storage media. Recommend ram based disk. + # a value of 1 will run everything serially + [int]$Concurrency = 8, + [int]$Limit = -1 # -1 means do not limit for test purposes +) + +# Folders and commands must be fullpath to run assuming +# the current folder is at the root of the git enlistment +$StartDate = (Get-Date) +$StartDate + + +$DebugPreference = "Continue" + +# These tests are not google test suites and we should guard +# Against running them as suites +$RunOnly = New-Object System.Collections.Generic.HashSet[string] +$RunOnly.Add("c_test") | Out-Null +$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null +$RunOnly.Add("merge_test") | Out-Null +$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written +$RunOnly.Add("backupable_db_test") | Out-Null # Disabled +$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest + +if($RunAll -and $SuiteRun -ne "") { + Write-Error "$RunAll and $SuiteRun are not compatible" + exit 1 +} + +if($RunAllExe -and $Run -ne "") { + Write-Error "$RunAllExe and $Run are not compatible" + exit 1 +} + +# If running under Appveyor assume that root +[string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER +if($Appveyor -ne "") { + $RootFolder = $Appveyor +} else { + $RootFolder = $PSScriptRoot -replace '\\build_tools', '' +} + +$LogFolder = -Join($RootFolder, "\db_logs\") +$BinariesFolder = -Join($RootFolder, "\build\Debug\") + +if($WorkFolder -eq "") { + + # If TEST_TMPDIR is set use it + [string]$var = $Env:TEST_TMPDIR + if($var -eq "") { + $WorkFolder = -Join($RootFolder, "\db_tests\") + $Env:TEST_TMPDIR = $WorkFolder + } else { + $WorkFolder = $var + } +} else { +# Override from a command line + $Env:TEST_TMPDIR = $WorkFolder +} + +Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder" +Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder" + +# Create test directories in the current folder +md -Path $WorkFolder -ErrorAction Ignore | Out-Null +md -Path $LogFolder -ErrorAction Ignore | Out-Null + + +$ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string] +if($ExcludeCases -ne "") { + Write-Host "ExcludeCases: $ExcludeCases" + $l = $ExcludeCases -split ' ' + ForEach($t in $l) { + $ExcludeCasesSet.Add($t) | Out-Null + } +} + +$ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string] +if($ExcludeExes -ne "") { + Write-Host "ExcludeExe: $ExcludeExes" + $l = $ExcludeExes -split ' ' + ForEach($t in $l) { + $ExcludeExesSet.Add($t) | Out-Null + } +} + + +# Extract the names of its tests by running db_test with --gtest_list_tests. +# This filter removes the "#"-introduced comments, and expands to +# fully-qualified names by changing input like this: +# +# DBTest. +# Empty +# WriteEmptyBatch +# MultiThreaded/MultiThreadedDBTest. +# MultiThreaded/0 # GetParam() = 0 +# MultiThreaded/1 # GetParam() = 1 +# +# into this: +# +# DBTest.Empty +# DBTest.WriteEmptyBatch +# MultiThreaded/MultiThreadedDBTest.MultiThreaded/0 +# MultiThreaded/MultiThreadedDBTest.MultiThreaded/1 +# +# Output into the parameter in a form TestName -> Log File Name +function ExtractTestCases([string]$GTestExe, $HashTable) { + + $Tests = @() +# Run db_test to get a list of tests and store it into $a array + &$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null + + # Current group + $Group="" + + ForEach( $l in $Tests) { + + # Leading whitespace is fine + $l = $l -replace '^\s+','' + # Trailing dot is a test group but no whitespace + if ($l -match "\.$" -and $l -notmatch "\s+") { + $Group = $l + } else { + # Otherwise it is a test name, remove leading space + $test = $l + # remove trailing comment if any and create a log name + $test = $test -replace '\s+\#.*','' + $test = "$Group$test" + + if($ExcludeCasesSet.Contains($test)) { + Write-Warning "$test case is excluded" + continue + } + + $test_log = $test -replace '[\./]','_' + $test_log += ".log" + $log_path = -join ($LogFolder, $test_log) + + # Add to a hashtable + $HashTable.Add($test, $log_path); + } + } +} + +# The function removes trailing .exe siffix if any, +# creates a name for the log file +# Then adds the test name if it was not excluded into +# a HashTable in a form of test_name -> log_path +function MakeAndAdd([string]$token, $HashTable) { + + $test_name = $token -replace '.exe$', '' + $log_name = -join ($test_name, ".log") + $log_path = -join ($LogFolder, $log_name) + $HashTable.Add($test_name, $log_path) +} + +# This function takes a list of Suites to run +# Lists all the test cases in each of the suite +# and populates HashOfHashes +# Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }} +function ProcessSuites($ListOfSuites, $HashOfHashes) { + + $suite_list = $ListOfSuites + # Problem: if you run --gtest_list_tests on + # a non Google Test executable then it will start executing + # and we will get nowhere + ForEach($suite in $suite_list) { + + if($RunOnly.Contains($suite)) { + Write-Warning "$suite is excluded from running as Google test suite" + continue + } + + if($EnableJE) { + $suite += "_je" + } + + $Cases = [ordered]@{} + $Cases.Clear() + $suite_exe = -Join ($BinariesFolder, $suite) + ExtractTestCases -GTestExe $suite_exe -HashTable $Cases + if($Cases.Count -gt 0) { + $HashOfHashes.Add($suite, $Cases); + } + } + + # Make logs and run + if($CasesToRun.Count -lt 1) { + Write-Error "Failed to extract tests from $SuiteRun" + exit 1 + } + +} + +# This will contain all test executables to run + +# Hash table that contains all non suite +# Test executable to run +$TestExes = [ordered]@{} + +# Check for test exe that are not +# Google Test Suites +# Since this is explicitely mentioned it is not subject +# for exclusions +if($Run -ne "") { + + $test_list = $Run -split ' ' + ForEach($t in $test_list) { + + if($EnableJE) { + $t += "_je" + } + MakeAndAdd -token $t -HashTable $TestExes + } + + if($TestExes.Count -lt 1) { + Write-Error "Failed to extract tests from $Run" + exit 1 + } +} elseif($RunAllExe) { + # Discover all the test binaries + if($EnableJE) { + $pattern = "*_test_je.exe" + } else { + $pattern = "*_test.exe" + } + + $search_path = -join ($BinariesFolder, $pattern) + Write-Host "Binaries Search Path: $search_path" + + $DiscoveredExe = @() + dir -Path $search_path | ForEach-Object { + $DiscoveredExe += ($_.Name) + } + + # Remove exclusions + ForEach($e in $DiscoveredExe) { + $e = $e -replace '.exe$', '' + $bare_name = $e -replace '_je$', '' + + if($ExcludeExesSet.Contains($bare_name)) { + Write-Warning "Test $e is excluded" + continue + } + MakeAndAdd -token $e -HashTable $TestExes + } + + if($TestExes.Count -lt 1) { + Write-Error "Failed to discover test executables" + exit 1 + } +} + +# Ordered by exe @{ Exe = @{ TestCase = LogName }} +$CasesToRun = [ordered]@{} + +if($SuiteRun -ne "") { + $suite_list = $SuiteRun -split ' ' + ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun +} elseif ($RunAll) { +# Discover all the test binaries + if($EnableJE) { + $pattern = "*_test_je.exe" + } else { + $pattern = "*_test.exe" + } + + $search_path = -join ($BinariesFolder, $pattern) + Write-Host "Binaries Search Path: $search_path" + + $ListOfExe = @() + dir -Path $search_path | ForEach-Object { + $ListOfExe += ($_.Name) + } + + # Exclude those in RunOnly from running as suites + $ListOfSuites = @() + ForEach($e in $ListOfExe) { + + $e = $e -replace '.exe$', '' + $bare_name = $e -replace '_je$', '' + + if($ExcludeExesSet.Contains($bare_name)) { + Write-Warning "Test $e is excluded" + continue + } + + if($RunOnly.Contains($bare_name)) { + MakeAndAdd -token $e -HashTable $TestExes + } else { + $ListOfSuites += $bare_name + } + } + + ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun +} + + +# Invoke a test with a filter and redirect all output +$InvokeTestCase = { + param($exe, $test, $log); + &$exe --gtest_filter=$test > $log 2>&1 +} + +# Invoke all tests and redirect output +$InvokeTestAsync = { + param($exe, $log) + &$exe > $log 2>&1 +} + +# Hash that contains tests to rerun if any failed +# Those tests will be rerun sequentially +# $Rerun = [ordered]@{} +# Test limiting factor here +[int]$count = 0 +# Overall status +[bool]$script:success = $true; + +function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal) +{ + # Array to wait for any of the running jobs + $jobs = @() + # Hash JobToLog + $JobToLog = @{} + + # Wait for all to finish and get the results + while(($JobToLog.Count -gt 0) -or + ($TestCmds.Count -gt 0) -or + ($Suites.Count -gt 0)) { + + # Make sure we have maximum concurrent jobs running if anything + # and the $Limit either not set or allows to proceed + while(($JobToLog.Count -lt $ConcurrencyVal) -and + ((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and + (($Limit -lt 0) -or ($count -lt $Limit)))) { + + # We always favore suites to run if available + [string]$exe_name = "" + [string]$log_path = "" + $Cases = @{} + + if($Suites.Count -gt 0) { + # Will the first one + ForEach($e in $Suites.Keys) { + $exe_name = $e + $Cases = $Suites[$e] + break + } + [string]$test_case = "" + [string]$log_path = "" + ForEach($c in $Cases.Keys) { + $test_case = $c + $log_path = $Cases[$c] + break + } + + Write-Host "Starting $exe_name::$test_case" + [string]$Exe = -Join ($BinariesFolder, $exe_name) + $job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase + $JobToLog.Add($job, $log_path) + + $Cases.Remove($test_case) + if($Cases.Count -lt 1) { + $Suites.Remove($exe_name) + } + + } elseif ($TestCmds.Count -gt 0) { + + ForEach($e in $TestCmds.Keys) { + $exe_name = $e + $log_path = $TestCmds[$e] + break + } + + Write-Host "Starting $exe_name" + [string]$Exe = -Join ($BinariesFolder, $exe_name) + $job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path) + $JobToLog.Add($job, $log_path) + + $TestCmds.Remove($exe_name) + + } else { + Write-Error "In the job loop but nothing to run" + exit 1 + } + + ++$count + } # End of Job starting loop + + if($JobToLog.Count -lt 1) { + break + } + + $jobs = @() + foreach($k in $JobToLog.Keys) { $jobs += $k } + + $completed = Wait-Job -Job $jobs -Any + $log = $JobToLog[$completed] + $JobToLog.Remove($completed) + + $message = -join @($completed.Name, " State: ", ($completed.State)) + + $log_content = @(Get-Content $log) + + if($completed.State -ne "Completed") { + $script:success = $false + Write-Warning $message + $log_content | Write-Warning + } else { + # Scan the log. If we find PASSED and no occurrence of FAILED + # then it is a success + [bool]$pass_found = $false + ForEach($l in $log_content) { + + if(($l -match "^\[\s+FAILED") -or + ($l -match "Assertion failed:")) { + $pass_found = $false + break + } + + if(($l -match "^\[\s+PASSED") -or + ($l -match " : PASSED$") -or + ($l -match "^PASS$") -or # Special c_test case + ($l -match "Passed all tests!") ) { + $pass_found = $true + } + } + + if(!$pass_found) { + $script:success = $false; + Write-Warning $message + $log_content | Write-Warning + } else { + Write-Host $message + } + } + + # Remove cached job info from the system + # Should be no output + Receive-Job -Job $completed | Out-Null + } +} + +RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency + +$EndDate = (Get-Date) + +New-TimeSpan -Start $StartDate -End $EndDate | + ForEach-Object { + "Elapsed time: {0:g}" -f $_ + } + + +if(!$script:success) { +# This does not succeed killing off jobs quick +# So we simply exit +# Remove-Job -Job $jobs -Force +# indicate failure using this exit code + exit 1 + } + + exit 0 + + diff --git a/src/rocksdb/build_tools/setup_centos7.sh b/src/rocksdb/build_tools/setup_centos7.sh new file mode 100755 index 00000000..c633131d --- /dev/null +++ b/src/rocksdb/build_tools/setup_centos7.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -e + +ROCKSDB_VERSION="5.10.3" +ZSTD_VERSION="1.1.3" + +echo "This script configures CentOS with everything needed to build and run RocksDB" + +yum update -y && yum install epel-release -y + +yum install -y \ + wget \ + gcc-c++ \ + snappy snappy-devel \ + zlib zlib-devel \ + bzip2 bzip2-devel \ + lz4-devel \ + libasan \ + gflags + +mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION} +ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb + +wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz +wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz + +cd /tmp + +tar xzvf zstd-${ZSTD_VERSION}.tar.gz +tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/ + +echo "Installing ZSTD..." +pushd zstd-${ZSTD_VERSION} +make && make install +popd + +echo "Compiling RocksDB..." +cd /usr/local/rocksdb +chown -R vagrant:vagrant /usr/local/rocksdb/ +sudo -u vagrant make static_lib +cd examples/ +sudo -u vagrant make all +sudo -u vagrant ./c_simple_example diff --git a/src/rocksdb/build_tools/update_dependencies.sh b/src/rocksdb/build_tools/update_dependencies.sh new file mode 100755 index 00000000..b060544d --- /dev/null +++ b/src/rocksdb/build_tools/update_dependencies.sh @@ -0,0 +1,171 @@ +#!/bin/sh +# +# Update dependencies.sh file with the latest avaliable versions + +BASEDIR=$(dirname $0) +OUTPUT="" + +function log_variable() +{ + echo "$1=${!1}" >> "$OUTPUT" +} + + +TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2" +## $1 => lib name +## $2 => lib version (if not provided, will try to pick latest) +## $3 => platform (if not provided, will try to pick latest gcc) +## +## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location +function get_lib_base() +{ + local lib_name=$1 + local lib_version=$2 + local lib_platform=$3 + + local result="$TP2_LATEST/$lib_name/" + + # Lib Version + if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then + # version is not provided, use latest + result=`ls -dr1v $result/*/ | head -n1` + else + result="$result/$lib_version/" + fi + + # Lib Platform + if [ -z "$lib_platform" ]; then + # platform is not provided, use latest gcc + result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1` + else + echo $lib_platform + result="$result/$lib_platform/" + fi + + result=`ls -1d $result/*/ | head -n1` + + # lib_name => LIB_NAME_BASE + local __res_var=${lib_name^^}"_BASE" + __res_var=`echo $__res_var | tr - _` + # LIB_NAME_BASE=$result + eval $__res_var=`readlink -f $result` + + log_variable $__res_var +} + +########################################################### +# platform007 dependencies # +########################################################### + +OUTPUT="$BASEDIR/dependencies_platform007.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + +echo "Writing dependencies to $OUTPUT" + +# Compilers locations +GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/` +CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/` + +log_variable GCC_BASE +log_variable CLANG_BASE + +# Libraries locations +get_lib_base libgcc 7.x platform007 +get_lib_base glibc 2.26 platform007 +get_lib_base snappy LATEST platform007 +get_lib_base zlib LATEST platform007 +get_lib_base bzip2 LATEST platform007 +get_lib_base lz4 LATEST platform007 +get_lib_base zstd LATEST platform007 +get_lib_base gflags LATEST platform007 +get_lib_base jemalloc LATEST platform007 +get_lib_base numa LATEST platform007 +get_lib_base libunwind LATEST platform007 +get_lib_base tbb LATEST platform007 + +get_lib_base kernel-headers fb platform007 +get_lib_base binutils LATEST centos7-native +get_lib_base valgrind LATEST platform007 +get_lib_base lua 5.3.4 platform007 + +git diff $OUTPUT + +########################################################### +# 5.x dependencies # +########################################################### + +OUTPUT="$BASEDIR/dependencies.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + +echo "Writing dependencies to $OUTPUT" + +# Compilers locations +GCC_BASE=`readlink -f $TP2_LATEST/gcc/5.x/centos7-native/*/` +CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/` + +log_variable GCC_BASE +log_variable CLANG_BASE + +# Libraries locations +get_lib_base libgcc 5.x gcc-5-glibc-2.23 +get_lib_base glibc 2.23 gcc-5-glibc-2.23 +get_lib_base snappy LATEST gcc-5-glibc-2.23 +get_lib_base zlib LATEST gcc-5-glibc-2.23 +get_lib_base bzip2 LATEST gcc-5-glibc-2.23 +get_lib_base lz4 LATEST gcc-5-glibc-2.23 +get_lib_base zstd LATEST gcc-5-glibc-2.23 +get_lib_base gflags LATEST gcc-5-glibc-2.23 +get_lib_base jemalloc LATEST gcc-5-glibc-2.23 +get_lib_base numa LATEST gcc-5-glibc-2.23 +get_lib_base libunwind LATEST gcc-5-glibc-2.23 +get_lib_base tbb LATEST gcc-5-glibc-2.23 + +get_lib_base kernel-headers 4.0.9-36_fbk5_2933_gd092e3f gcc-5-glibc-2.23 +get_lib_base binutils LATEST centos7-native +get_lib_base valgrind LATEST gcc-5-glibc-2.23 +get_lib_base lua 5.2.3 gcc-5-glibc-2.23 + +git diff $OUTPUT + +########################################################### +# 4.8.1 dependencies # +########################################################### + +OUTPUT="$BASEDIR/dependencies_4.8.1.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + +echo "Writing 4.8.1 dependencies to $OUTPUT" + +# Compilers locations +GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/` +CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/` + +log_variable GCC_BASE +log_variable CLANG_BASE + +# Libraries locations +get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17 +get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17 +get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17 +get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17 +get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17 +get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17 +get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17 +get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17 +get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17 +get_lib_base numa LATEST gcc-4.8.1-glibc-2.17 +get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17 +get_lib_base tbb 4.0_update2 gcc-4.8.1-glibc-2.17 + +get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17 +get_lib_base binutils LATEST centos6-native +get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17 +get_lib_base lua 5.2.3 centos6-native + +git diff $OUTPUT diff --git a/src/rocksdb/build_tools/version.sh b/src/rocksdb/build_tools/version.sh new file mode 100755 index 00000000..4e3b9f20 --- /dev/null +++ b/src/rocksdb/build_tools/version.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +if [ "$#" = "0" ]; then + echo "Usage: $0 major|minor|patch|full" + exit 1 +fi + +if [ "$1" = "major" ]; then + cat include/rocksdb/version.h | grep MAJOR | head -n1 | awk '{print $3}' +fi +if [ "$1" = "minor" ]; then + cat include/rocksdb/version.h | grep MINOR | head -n1 | awk '{print $3}' +fi +if [ "$1" = "patch" ]; then + cat include/rocksdb/version.h | grep PATCH | head -n1 | awk '{print $3}' +fi +if [ "$1" = "full" ]; then + awk '/#define ROCKSDB/ { env[$2] = $3 } + END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"], + env["ROCKSDB_MINOR"], + env["ROCKSDB_PATCH"] }' \ + include/rocksdb/version.h +fi |